1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 
13 #include "./vpx_dsp_rtcd.h"
14 #include "vpx_dsp/quantize.h"
15 #include "vpx_dsp/vpx_dsp_common.h"
16 #include "vpx_mem/vpx_mem.h"
17 
vpx_quantize_dc(const tran_low_t * coeff_ptr,int n_coeffs,const int16_t * round_ptr,const int16_t quant,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t dequant,uint16_t * eob_ptr)18 void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
19                      const int16_t *round_ptr, const int16_t quant,
20                      tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
21                      const int16_t dequant, uint16_t *eob_ptr) {
22   const int rc = 0;
23   const int coeff = coeff_ptr[rc];
24   const int coeff_sign = (coeff >> 31);
25   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
26   int tmp, eob = -1;
27 
28   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
29   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
30 
31   tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
32   tmp = (tmp * quant) >> 16;
33   qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
34   dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
35   if (tmp) eob = 0;
36 
37   *eob_ptr = eob + 1;
38 }
39 
40 #if CONFIG_VP9_HIGHBITDEPTH
vpx_highbd_quantize_dc(const tran_low_t * coeff_ptr,int n_coeffs,const int16_t * round_ptr,const int16_t quant,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t dequant,uint16_t * eob_ptr)41 void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
42                             const int16_t *round_ptr, const int16_t quant,
43                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
44                             const int16_t dequant, uint16_t *eob_ptr) {
45   int eob = -1;
46 
47   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
48   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
49 
50   {
51     const int coeff = coeff_ptr[0];
52     const int coeff_sign = (coeff >> 31);
53     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
54     const int64_t tmp = abs_coeff + round_ptr[0];
55     const int abs_qcoeff = (int)((tmp * quant) >> 16);
56     qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
57     dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant;
58     if (abs_qcoeff) eob = 0;
59   }
60 
61   *eob_ptr = eob + 1;
62 }
63 #endif
64 
vpx_quantize_dc_32x32(const tran_low_t * coeff_ptr,const int16_t * round_ptr,const int16_t quant,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t dequant,uint16_t * eob_ptr)65 void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr,
66                            const int16_t *round_ptr, const int16_t quant,
67                            tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
68                            const int16_t dequant, uint16_t *eob_ptr) {
69   const int n_coeffs = 1024;
70   const int rc = 0;
71   const int coeff = coeff_ptr[rc];
72   const int coeff_sign = (coeff >> 31);
73   const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
74   int tmp, eob = -1;
75 
76   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
77   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
78 
79   tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), INT16_MIN,
80               INT16_MAX);
81   tmp = (tmp * quant) >> 15;
82   qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
83   dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / 2;
84   if (tmp) eob = 0;
85 
86   *eob_ptr = eob + 1;
87 }
88 
89 #if CONFIG_VP9_HIGHBITDEPTH
vpx_highbd_quantize_dc_32x32(const tran_low_t * coeff_ptr,const int16_t * round_ptr,const int16_t quant,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t dequant,uint16_t * eob_ptr)90 void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr,
91                                   const int16_t *round_ptr, const int16_t quant,
92                                   tran_low_t *qcoeff_ptr,
93                                   tran_low_t *dqcoeff_ptr,
94                                   const int16_t dequant, uint16_t *eob_ptr) {
95   const int n_coeffs = 1024;
96   int eob = -1;
97 
98   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
99   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
100 
101   {
102     const int coeff = coeff_ptr[0];
103     const int coeff_sign = (coeff >> 31);
104     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
105     const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1);
106     const int abs_qcoeff = (int)((tmp * quant) >> 15);
107     qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
108     dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant / 2;
109     if (abs_qcoeff) eob = 0;
110   }
111 
112   *eob_ptr = eob + 1;
113 }
114 #endif
115 
vpx_quantize_b_c(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const int16_t * zbin_ptr,const int16_t * round_ptr,const int16_t * quant_ptr,const int16_t * quant_shift_ptr,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const int16_t * scan,const int16_t * iscan)116 void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
117                       const int16_t *zbin_ptr, const int16_t *round_ptr,
118                       const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
119                       tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
120                       const int16_t *dequant_ptr, uint16_t *eob_ptr,
121                       const int16_t *scan, const int16_t *iscan) {
122   int i, non_zero_count = (int)n_coeffs, eob = -1;
123   const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
124   const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
125   (void)iscan;
126 
127   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
128   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
129 
130   // Pre-scan pass
131   for (i = (int)n_coeffs - 1; i >= 0; i--) {
132     const int rc = scan[i];
133     const int coeff = coeff_ptr[rc];
134 
135     if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
136       non_zero_count--;
137     else
138       break;
139   }
140 
141   // Quantization pass: All coefficients with index >= zero_flag are
142   // skippable. Note: zero_flag can be zero.
143   for (i = 0; i < non_zero_count; i++) {
144     const int rc = scan[i];
145     const int coeff = coeff_ptr[rc];
146     const int coeff_sign = (coeff >> 31);
147     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
148 
149     if (abs_coeff >= zbins[rc != 0]) {
150       int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
151       tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
152              quant_shift_ptr[rc != 0]) >>
153             16;  // quantization
154       qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
155       dqcoeff_ptr[rc] = (tran_low_t)(qcoeff_ptr[rc] * dequant_ptr[rc != 0]);
156 
157       if (tmp) eob = i;
158     }
159   }
160   *eob_ptr = eob + 1;
161 }
162 
163 #if CONFIG_VP9_HIGHBITDEPTH
vpx_highbd_quantize_b_c(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const int16_t * zbin_ptr,const int16_t * round_ptr,const int16_t * quant_ptr,const int16_t * quant_shift_ptr,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const int16_t * scan,const int16_t * iscan)164 void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
165                              const int16_t *zbin_ptr, const int16_t *round_ptr,
166                              const int16_t *quant_ptr,
167                              const int16_t *quant_shift_ptr,
168                              tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
169                              const int16_t *dequant_ptr, uint16_t *eob_ptr,
170                              const int16_t *scan, const int16_t *iscan) {
171   int i, non_zero_count = (int)n_coeffs, eob = -1;
172   const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
173   const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
174   (void)iscan;
175 
176   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
177   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
178 
179   // Pre-scan pass
180   for (i = (int)n_coeffs - 1; i >= 0; i--) {
181     const int rc = scan[i];
182     const int coeff = coeff_ptr[rc];
183 
184     if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
185       non_zero_count--;
186     else
187       break;
188   }
189 
190   // Quantization pass: All coefficients with index >= zero_flag are
191   // skippable. Note: zero_flag can be zero.
192   for (i = 0; i < non_zero_count; i++) {
193     const int rc = scan[i];
194     const int coeff = coeff_ptr[rc];
195     const int coeff_sign = (coeff >> 31);
196     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
197 
198     if (abs_coeff >= zbins[rc != 0]) {
199       const int64_t tmp1 = abs_coeff + round_ptr[rc != 0];
200       const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
201       const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 16);
202       qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
203       dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
204       if (abs_qcoeff) eob = i;
205     }
206   }
207   *eob_ptr = eob + 1;
208 }
209 #endif
210 
vpx_quantize_b_32x32_c(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const int16_t * zbin_ptr,const int16_t * round_ptr,const int16_t * quant_ptr,const int16_t * quant_shift_ptr,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const int16_t * scan,const int16_t * iscan)211 void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
212                             const int16_t *zbin_ptr, const int16_t *round_ptr,
213                             const int16_t *quant_ptr,
214                             const int16_t *quant_shift_ptr,
215                             tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
216                             const int16_t *dequant_ptr, uint16_t *eob_ptr,
217                             const int16_t *scan, const int16_t *iscan) {
218   const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
219                          ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
220   const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
221 
222   int idx = 0;
223   int idx_arr[1024];
224   int i, eob = -1;
225   (void)iscan;
226 
227   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
228   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
229 
230   // Pre-scan pass
231   for (i = 0; i < n_coeffs; i++) {
232     const int rc = scan[i];
233     const int coeff = coeff_ptr[rc];
234 
235     // If the coefficient is out of the base ZBIN range, keep it for
236     // quantization.
237     if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0]) idx_arr[idx++] = i;
238   }
239 
240   // Quantization pass: only process the coefficients selected in
241   // pre-scan pass. Note: idx can be zero.
242   for (i = 0; i < idx; i++) {
243     const int rc = scan[idx_arr[i]];
244     const int coeff = coeff_ptr[rc];
245     const int coeff_sign = (coeff >> 31);
246     int tmp;
247     int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
248     abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
249     abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
250     tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
251            quant_shift_ptr[rc != 0]) >>
252           15;
253 
254     qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
255 #if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH
256     // When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than
257     // truncating with a cast, saturate the value. This is easier to implement
258     // on x86 and preserves the sign of the value.
259     dqcoeff_ptr[rc] =
260         clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX);
261 #else
262     dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
263 #endif  // VPX_ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH
264 
265     if (tmp) eob = idx_arr[i];
266   }
267   *eob_ptr = eob + 1;
268 }
269 
270 #if CONFIG_VP9_HIGHBITDEPTH
vpx_highbd_quantize_b_32x32_c(const tran_low_t * coeff_ptr,intptr_t n_coeffs,const int16_t * zbin_ptr,const int16_t * round_ptr,const int16_t * quant_ptr,const int16_t * quant_shift_ptr,tran_low_t * qcoeff_ptr,tran_low_t * dqcoeff_ptr,const int16_t * dequant_ptr,uint16_t * eob_ptr,const int16_t * scan,const int16_t * iscan)271 void vpx_highbd_quantize_b_32x32_c(
272     const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
273     const int16_t *round_ptr, const int16_t *quant_ptr,
274     const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
275     tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
276     const int16_t *scan, const int16_t *iscan) {
277   const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1),
278                          ROUND_POWER_OF_TWO(zbin_ptr[1], 1) };
279   const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
280 
281   int idx = 0;
282   int idx_arr[1024];
283   int i, eob = -1;
284   (void)iscan;
285 
286   memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
287   memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
288 
289   // Pre-scan pass
290   for (i = 0; i < n_coeffs; i++) {
291     const int rc = scan[i];
292     const int coeff = coeff_ptr[rc];
293 
294     // If the coefficient is out of the base ZBIN range, keep it for
295     // quantization.
296     if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0]) idx_arr[idx++] = i;
297   }
298 
299   // Quantization pass: only process the coefficients selected in
300   // pre-scan pass. Note: idx can be zero.
301   for (i = 0; i < idx; i++) {
302     const int rc = scan[idx_arr[i]];
303     const int coeff = coeff_ptr[rc];
304     const int coeff_sign = (coeff >> 31);
305     const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
306     const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
307     const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
308     const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
309     qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
310     dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
311     if (abs_qcoeff) eob = idx_arr[i];
312   }
313   *eob_ptr = eob + 1;
314 }
315 #endif
316