1 /******************************************************************************
2 *
3 * Copyright 2014 The Android Open Source Project
4 * Copyright 2003 - 2004 Open Interface North America, Inc. All rights
5 * reserved.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at:
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 ******************************************************************************/
20
21 /*******************************************************************************
22 $Revision: #1 $
23 ******************************************************************************/
24
25 /** @file
26 @ingroup codec_internal
27 */
28
29 /**@addgroup codec_internal*/
30 /**@{*/
31
32 /*
33 * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima
34 * factorization. The scaling factors are folded into the windowing
35 * constants. 29 adds and 5 16x32 multiplies per 8 samples.
36 */
37
38 #include "oi_codec_sbc_private.h"
39
40 #define AAN_C4_FIX (759250125) /* S1.30 759250125 0.707107*/
41
42 #define AAN_C6_FIX (410903207) /* S1.30 410903207 0.382683*/
43
44 #define AAN_Q0_FIX (581104888) /* S1.30 581104888 0.541196*/
45
46 #define AAN_Q1_FIX (1402911301) /* S1.30 1402911301 1.306563*/
47
48 /** Scales x by y bits to the right, adding a rounding factor.
49 */
50 #ifndef SCALE
51 #define SCALE(x, y) (((x) + (1 << ((y)-1))) >> (y))
52 #endif
53
54 /**
55 * Default C language implementation of a 32x32->32 multiply. This function may
56 * be replaced by a platform-specific version for speed.
57 *
58 * @param u A signed 32-bit multiplicand
59 * @param v A signed 32-bit multiplier
60
61 * @return A signed 32-bit value corresponding to the 32 most significant bits
62 * of the 64-bit product of u and v.
63 */
default_mul_32s_32s_hi(int32_t u,int32_t v)64 INLINE int32_t default_mul_32s_32s_hi(int32_t u, int32_t v) {
65 uint32_t u0, v0;
66 int32_t u1, v1, w1, w2, t;
67
68 u0 = u & 0xFFFF;
69 u1 = u >> 16;
70 v0 = v & 0xFFFF;
71 v1 = v >> 16;
72 t = u0 * v0;
73 t = u1 * v0 + ((uint32_t)t >> 16);
74 w1 = t & 0xFFFF;
75 w2 = t >> 16;
76 w1 = u0 * v1 + w1;
77 return u1 * v1 + w2 + (w1 >> 16);
78 }
79
80 #define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y)
81
82 #ifdef DEBUG_DCT
float_dct2_8(float * RESTRICT out,int32_t const * RESTRICT in)83 PRIVATE void float_dct2_8(float* RESTRICT out, int32_t const* RESTRICT in) {
84 #define FIX(x, bits) \
85 (((int)floor(0.5f + ((x) * ((float)(1 << bits))))) / ((float)(1 << bits)))
86 #define FLOAT_BUTTERFLY(x, y) \
87 x += y; \
88 y = x - (y * 2); \
89 OI_ASSERT(VALID_INT32(x)); \
90 OI_ASSERT(VALID_INT32(y));
91 #define FLOAT_MULT_DCT(K, sample) (FIX(K, 20) * sample)
92 #define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y))))
93
94 double L00, L01, L02, L03, L04, L05, L06, L07;
95 double L25;
96
97 double in0, in1, in2, in3;
98 double in4, in5, in6, in7;
99
100 in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN);
101 OI_ASSERT(VALID_INT32(in0));
102 in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN);
103 OI_ASSERT(VALID_INT32(in1));
104 in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN);
105 OI_ASSERT(VALID_INT32(in2));
106 in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN);
107 OI_ASSERT(VALID_INT32(in3));
108 in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN);
109 OI_ASSERT(VALID_INT32(in4));
110 in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN);
111 OI_ASSERT(VALID_INT32(in5));
112 in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN);
113 OI_ASSERT(VALID_INT32(in6));
114 in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN);
115 OI_ASSERT(VALID_INT32(in7));
116
117 L00 = (in0 + in7);
118 OI_ASSERT(VALID_INT32(L00));
119 L01 = (in1 + in6);
120 OI_ASSERT(VALID_INT32(L01));
121 L02 = (in2 + in5);
122 OI_ASSERT(VALID_INT32(L02));
123 L03 = (in3 + in4);
124 OI_ASSERT(VALID_INT32(L03));
125
126 L04 = (in3 - in4);
127 OI_ASSERT(VALID_INT32(L04));
128 L05 = (in2 - in5);
129 OI_ASSERT(VALID_INT32(L05));
130 L06 = (in1 - in6);
131 OI_ASSERT(VALID_INT32(L06));
132 L07 = (in0 - in7);
133 OI_ASSERT(VALID_INT32(L07));
134
135 FLOAT_BUTTERFLY(L00, L03);
136 FLOAT_BUTTERFLY(L01, L02);
137
138 L02 += L03;
139 OI_ASSERT(VALID_INT32(L02));
140
141 L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02);
142 OI_ASSERT(VALID_INT32(L02));
143
144 FLOAT_BUTTERFLY(L00, L01);
145
146 out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0);
147 OI_ASSERT(VALID_INT16(out[0]));
148 out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4);
149 OI_ASSERT(VALID_INT16(out[4]));
150
151 FLOAT_BUTTERFLY(L03, L02);
152 out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6);
153 OI_ASSERT(VALID_INT16(out[6]));
154 out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2);
155 OI_ASSERT(VALID_INT16(out[2]));
156
157 L04 += L05;
158 OI_ASSERT(VALID_INT32(L04));
159 L05 += L06;
160 OI_ASSERT(VALID_INT32(L05));
161 L06 += L07;
162 OI_ASSERT(VALID_INT32(L06));
163
164 L04 /= 2;
165 L05 /= 2;
166 L06 /= 2;
167 L07 /= 2;
168
169 L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05);
170 OI_ASSERT(VALID_INT32(L05));
171
172 L25 = L06 - L04;
173 OI_ASSERT(VALID_INT32(L25));
174 L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25);
175 OI_ASSERT(VALID_INT32(L25));
176
177 L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04);
178 OI_ASSERT(VALID_INT32(L04));
179 L04 -= L25;
180 OI_ASSERT(VALID_INT32(L04));
181
182 L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06);
183 OI_ASSERT(VALID_INT32(L06));
184 L06 -= L25;
185 OI_ASSERT(VALID_INT32(L25));
186
187 FLOAT_BUTTERFLY(L07, L05);
188
189 FLOAT_BUTTERFLY(L05, L04);
190 out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3 - 1));
191 OI_ASSERT(VALID_INT16(out[3]));
192 out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5 - 1));
193 OI_ASSERT(VALID_INT16(out[5]));
194
195 FLOAT_BUTTERFLY(L07, L06);
196 out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7 - 1));
197 OI_ASSERT(VALID_INT16(out[7]));
198 out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1 - 1));
199 OI_ASSERT(VALID_INT16(out[1]));
200 }
201 #undef BUTTERFLY
202 #endif
203
204 /*
205 * This function calculates the AAN DCT. Its inputs are in S16.15 format, as
206 * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38
207 * (1244918057 integer). The function it computes is an approximation to the
208 * array defined by:
209 *
210 * diag(aan_s) * AAN= C2
211 *
212 * or
213 *
214 * AAN = diag(1/aan_s) * C2
215 *
216 * where C2 is as it is defined in the comment at the head of this file, and
217 *
218 * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1;
219 *
220 * aan_s[i] = [ 1.000 0.510 0.541 0.601 0.707 0.900 1.307 2.563 ]
221 *
222 * The output ranges are shown as follows:
223 *
224 * Let Y[0..7] = AAN * X[0..7]
225 *
226 * Without loss of generality, assume the input vector X consists of elements
227 * between -1 and 1. The maximum possible value of a given output element occurs
228 * with some particular combination of input vector elements each of which is -1
229 * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y
230 * is maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a
231 * positive contribution to the sum. Equivalently, one may simply sum
232 * abs(AAN)[t,i] over t to get the maximum possible value of Y[i].
233 *
234 * This yields approximately:
235 * [8.00 10.05 9.66 8.52 8.00 5.70 4.00 2.00]
236 *
237 * Given the maximum magnitude sensible input value of +/-37992, this yields the
238 * following vector of maximum output magnitudes:
239 *
240 * [ 303936 381820 367003 323692 303936 216555 151968 75984 ]
241 *
242 * Ultimately, these values must fit into 16 bit signed integers, so they must
243 * be scaled. A non-uniform scaling helps maximize the kept precision. The
244 * relative number of extra bits of precision maintainable with respect to the
245 * largest value is given here:
246 *
247 * [ 0 0 0 0 0 0 1 2 ]
248 *
249 */
dct2_8(SBC_BUFFER_T * RESTRICT out,int32_t const * RESTRICT in)250 PRIVATE void dct2_8(SBC_BUFFER_T* RESTRICT out, int32_t const* RESTRICT in) {
251 #define BUTTERFLY(x, y) \
252 x += (y); \
253 (y) = (x) - ((y) << 1);
254 #define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K, x) << 2)
255
256 int32_t L00, L01, L02, L03, L04, L05, L06, L07;
257 int32_t L25;
258
259 int32_t in0, in1, in2, in3;
260 int32_t in4, in5, in6, in7;
261
262 #if DCTII_8_SHIFT_IN != 0
263 in0 = SCALE(in[0], DCTII_8_SHIFT_IN);
264 in1 = SCALE(in[1], DCTII_8_SHIFT_IN);
265 in2 = SCALE(in[2], DCTII_8_SHIFT_IN);
266 in3 = SCALE(in[3], DCTII_8_SHIFT_IN);
267 in4 = SCALE(in[4], DCTII_8_SHIFT_IN);
268 in5 = SCALE(in[5], DCTII_8_SHIFT_IN);
269 in6 = SCALE(in[6], DCTII_8_SHIFT_IN);
270 in7 = SCALE(in[7], DCTII_8_SHIFT_IN);
271 #else
272 in0 = in[0];
273 in1 = in[1];
274 in2 = in[2];
275 in3 = in[3];
276 in4 = in[4];
277 in5 = in[5];
278 in6 = in[6];
279 in7 = in[7];
280 #endif
281
282 L00 = in0 + in7;
283 L01 = in1 + in6;
284 L02 = in2 + in5;
285 L03 = in3 + in4;
286
287 L04 = in3 - in4;
288 L05 = in2 - in5;
289 L06 = in1 - in6;
290 L07 = in0 - in7;
291
292 BUTTERFLY(L00, L03);
293 BUTTERFLY(L01, L02);
294
295 L02 += L03;
296
297 L02 = FIX_MULT_DCT(AAN_C4_FIX, L02);
298
299 BUTTERFLY(L00, L01);
300
301 out[0] = (int16_t)SCALE(L00, DCTII_8_SHIFT_0);
302 out[4] = (int16_t)SCALE(L01, DCTII_8_SHIFT_4);
303
304 BUTTERFLY(L03, L02);
305 out[6] = (int16_t)SCALE(L02, DCTII_8_SHIFT_6);
306 out[2] = (int16_t)SCALE(L03, DCTII_8_SHIFT_2);
307
308 L04 += L05;
309 L05 += L06;
310 L06 += L07;
311
312 L04 /= 2;
313 L05 /= 2;
314 L06 /= 2;
315 L07 /= 2;
316
317 L05 = FIX_MULT_DCT(AAN_C4_FIX, L05);
318
319 L25 = L06 - L04;
320 L25 = FIX_MULT_DCT(AAN_C6_FIX, L25);
321
322 L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04);
323 L04 -= L25;
324
325 L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06);
326 L06 -= L25;
327
328 BUTTERFLY(L07, L05);
329
330 BUTTERFLY(L05, L04);
331 out[3] = (int16_t)SCALE(L04, DCTII_8_SHIFT_3 - 1);
332 out[5] = (int16_t)SCALE(L05, DCTII_8_SHIFT_5 - 1);
333
334 BUTTERFLY(L07, L06);
335 out[7] = (int16_t)SCALE(L06, DCTII_8_SHIFT_7 - 1);
336 out[1] = (int16_t)SCALE(L07, DCTII_8_SHIFT_1 - 1);
337 #undef BUTTERFLY
338
339 #ifdef DEBUG_DCT
340 {
341 float float_out[8];
342 float_dct2_8(float_out, in);
343 }
344 #endif
345 }
346
347 /**@}*/
348