1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2003 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
13
14 function: normalized modified discrete cosine transform
15 power of two length transform only [64 <= n ]
16 last mod: $Id: mdct.c,v 1.9.6.5 2003/04/29 04:03:27 xiphmont Exp $
17
18 Original algorithm adapted long ago from _The use of multirate filter
19 banks for coding of high quality digital audio_, by T. Sporer,
20 K. Brandenburg and B. Edler, collection of the European Signal
21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
22 211-214
23
24 The below code implements an algorithm that no longer looks much like
25 that presented in the paper, but the basic structure remains if you
26 dig deep enough to see it.
27
28 This module DOES NOT INCLUDE code to generate/apply the window
29 function. Everybody has their own weird favorite including me... I
30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
31 vehemently disagree.
32
33 ********************************************************************/
34
35 #include "ivorbiscodec.h"
36 #include "os.h"
37 #include "misc.h"
38 #include "mdct.h"
39 #include "mdct_lookup.h"
40
presymmetry(DATA_TYPE * in,int n2,int step)41 STIN void presymmetry(DATA_TYPE *in,int n2,int step){
42 DATA_TYPE *aX;
43 DATA_TYPE *bX;
44 LOOKUP_T *T;
45 int n4=n2>>1;
46
47 aX = in+n2-3;
48 T = sincos_lookup0;
49
50 do{
51 REG_TYPE r0= aX[0];
52 REG_TYPE r2= aX[2];
53 XPROD31( r0, r2, T[0], T[1], &aX[0], &aX[2] ); T+=step;
54 aX-=4;
55 }while(aX>=in+n4);
56 do{
57 REG_TYPE r0= aX[0];
58 REG_TYPE r2= aX[2];
59 XPROD31( r0, r2, T[1], T[0], &aX[0], &aX[2] ); T-=step;
60 aX-=4;
61 }while(aX>=in);
62
63 aX = in+n2-4;
64 bX = in;
65 T = sincos_lookup0;
66 do{
67 REG_TYPE ri0= aX[0];
68 REG_TYPE ri2= aX[2];
69 REG_TYPE ro0= bX[0];
70 REG_TYPE ro2= bX[2];
71
72 XNPROD31( ro2, ro0, T[1], T[0], &aX[0], &aX[2] ); T+=step;
73 XNPROD31( ri2, ri0, T[0], T[1], &bX[0], &bX[2] );
74
75 aX-=4;
76 bX+=4;
77 }while(aX>=in+n4);
78
79 }
80
81 /* 8 point butterfly (in place) */
mdct_butterfly_8(DATA_TYPE * x)82 STIN void mdct_butterfly_8(DATA_TYPE *x){
83
84 REG_TYPE r0 = x[0] + x[1];
85 REG_TYPE r1 = x[0] - x[1];
86 REG_TYPE r2 = x[2] + x[3];
87 REG_TYPE r3 = x[2] - x[3];
88 REG_TYPE r4 = x[4] + x[5];
89 REG_TYPE r5 = x[4] - x[5];
90 REG_TYPE r6 = x[6] + x[7];
91 REG_TYPE r7 = x[6] - x[7];
92
93 x[0] = r5 + r3;
94 x[1] = r7 - r1;
95 x[2] = r5 - r3;
96 x[3] = r7 + r1;
97 x[4] = r4 - r0;
98 x[5] = r6 - r2;
99 x[6] = r4 + r0;
100 x[7] = r6 + r2;
101 MB();
102 }
103
104 /* 16 point butterfly (in place, 4 register) */
mdct_butterfly_16(DATA_TYPE * x)105 STIN void mdct_butterfly_16(DATA_TYPE *x){
106
107 REG_TYPE r0, r1, r2, r3;
108
109 r0 = x[ 8] - x[ 9]; x[ 8] += x[ 9];
110 r1 = x[10] - x[11]; x[10] += x[11];
111 r2 = x[ 1] - x[ 0]; x[ 9] = x[ 1] + x[0];
112 r3 = x[ 3] - x[ 2]; x[11] = x[ 3] + x[2];
113 x[ 0] = MULT31((r0 - r1) , cPI2_8);
114 x[ 1] = MULT31((r2 + r3) , cPI2_8);
115 x[ 2] = MULT31((r0 + r1) , cPI2_8);
116 x[ 3] = MULT31((r3 - r2) , cPI2_8);
117 MB();
118
119 r2 = x[12] - x[13]; x[12] += x[13];
120 r3 = x[14] - x[15]; x[14] += x[15];
121 r0 = x[ 4] - x[ 5]; x[13] = x[ 5] + x[ 4];
122 r1 = x[ 7] - x[ 6]; x[15] = x[ 7] + x[ 6];
123 x[ 4] = r2; x[ 5] = r1;
124 x[ 6] = r3; x[ 7] = r0;
125 MB();
126
127 mdct_butterfly_8(x);
128 mdct_butterfly_8(x+8);
129 }
130
131 /* 32 point butterfly (in place, 4 register) */
mdct_butterfly_32(DATA_TYPE * x)132 STIN void mdct_butterfly_32(DATA_TYPE *x){
133
134 REG_TYPE r0, r1, r2, r3;
135
136 r0 = x[16] - x[17]; x[16] += x[17];
137 r1 = x[18] - x[19]; x[18] += x[19];
138 r2 = x[ 1] - x[ 0]; x[17] = x[ 1] + x[ 0];
139 r3 = x[ 3] - x[ 2]; x[19] = x[ 3] + x[ 2];
140 XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] );
141 XPROD31 ( r2, r3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] );
142 MB();
143
144 r0 = x[20] - x[21]; x[20] += x[21];
145 r1 = x[22] - x[23]; x[22] += x[23];
146 r2 = x[ 5] - x[ 4]; x[21] = x[ 5] + x[ 4];
147 r3 = x[ 7] - x[ 6]; x[23] = x[ 7] + x[ 6];
148 x[ 4] = MULT31((r0 - r1) , cPI2_8);
149 x[ 5] = MULT31((r3 + r2) , cPI2_8);
150 x[ 6] = MULT31((r0 + r1) , cPI2_8);
151 x[ 7] = MULT31((r3 - r2) , cPI2_8);
152 MB();
153
154 r0 = x[24] - x[25]; x[24] += x[25];
155 r1 = x[26] - x[27]; x[26] += x[27];
156 r2 = x[ 9] - x[ 8]; x[25] = x[ 9] + x[ 8];
157 r3 = x[11] - x[10]; x[27] = x[11] + x[10];
158 XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[ 8], &x[10] );
159 XPROD31 ( r2, r3, cPI3_8, cPI1_8, &x[ 9], &x[11] );
160 MB();
161
162 r0 = x[28] - x[29]; x[28] += x[29];
163 r1 = x[30] - x[31]; x[30] += x[31];
164 r2 = x[12] - x[13]; x[29] = x[13] + x[12];
165 r3 = x[15] - x[14]; x[31] = x[15] + x[14];
166 x[12] = r0; x[13] = r3;
167 x[14] = r1; x[15] = r2;
168 MB();
169
170 mdct_butterfly_16(x);
171 mdct_butterfly_16(x+16);
172 }
173
174 /* N/stage point generic N stage butterfly (in place, 2 register) */
mdct_butterfly_generic(DATA_TYPE * x,int points,int step)175 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
176
177 LOOKUP_T *T = sincos_lookup0;
178 DATA_TYPE *x1 = x + points - 4;
179 DATA_TYPE *x2 = x + (points>>1) - 4;
180 REG_TYPE r0, r1, r2, r3;
181
182 do{
183 r0 = x1[0] - x1[1]; x1[0] += x1[1];
184 r1 = x1[3] - x1[2]; x1[2] += x1[3];
185 r2 = x2[1] - x2[0]; x1[1] = x2[1] + x2[0];
186 r3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2];
187 XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[2] );
188 XPROD31( r2, r3, T[0], T[1], &x2[1], &x2[3] ); T+=step;
189 x1-=4;
190 x2-=4;
191 }while(T<sincos_lookup0+1024);
192 do{
193 r0 = x1[0] - x1[1]; x1[0] += x1[1];
194 r1 = x1[2] - x1[3]; x1[2] += x1[3];
195 r2 = x2[0] - x2[1]; x1[1] = x2[1] + x2[0];
196 r3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2];
197 XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[2] );
198 XNPROD31( r3, r2, T[0], T[1], &x2[1], &x2[3] ); T-=step;
199 x1-=4;
200 x2-=4;
201 }while(T>sincos_lookup0);
202 }
203
mdct_butterflies(DATA_TYPE * x,int points,int shift)204 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
205
206 int stages=8-shift;
207 int i,j;
208
209 for(i=0;--stages>0;i++){
210 for(j=0;j<(1<<i);j++)
211 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
212 }
213
214 for(j=0;j<points;j+=32)
215 mdct_butterfly_32(x+j);
216 }
217
218 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
219
bitrev12(int x)220 STIN int bitrev12(int x){
221 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
222 }
223
mdct_bitreverse(DATA_TYPE * x,int n,int shift)224 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int shift){
225 int bit = 0;
226 DATA_TYPE *w = x+(n>>1);
227
228 do{
229 DATA_TYPE b = bitrev12(bit++);
230 DATA_TYPE *xx = x + (b>>shift);
231 REG_TYPE r;
232
233 w -= 2;
234
235 if(w>xx){
236
237 r = xx[0];
238 xx[0] = w[0];
239 w[0] = r;
240
241 r = xx[1];
242 xx[1] = w[1];
243 w[1] = r;
244 }
245 }while(w>x);
246 }
247
mdct_step7(DATA_TYPE * x,int n,int step)248 STIN void mdct_step7(DATA_TYPE *x,int n,int step){
249 DATA_TYPE *w0 = x;
250 DATA_TYPE *w1 = x+(n>>1);
251 LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
252 LOOKUP_T *Ttop = T+1024;
253 REG_TYPE r0, r1, r2, r3;
254
255 do{
256 w1 -= 2;
257
258 r0 = w0[0] + w1[0];
259 r1 = w1[1] - w0[1];
260 r2 = MULT32(r0, T[1]) + MULT32(r1, T[0]);
261 r3 = MULT32(r1, T[1]) - MULT32(r0, T[0]);
262 T+=step;
263
264 r0 = (w0[1] + w1[1])>>1;
265 r1 = (w0[0] - w1[0])>>1;
266 w0[0] = r0 + r2;
267 w0[1] = r1 + r3;
268 w1[0] = r0 - r2;
269 w1[1] = r3 - r1;
270
271 w0 += 2;
272 }while(T<Ttop);
273 do{
274 w1 -= 2;
275
276 r0 = w0[0] + w1[0];
277 r1 = w1[1] - w0[1];
278 T-=step;
279 r2 = MULT32(r0, T[0]) + MULT32(r1, T[1]);
280 r3 = MULT32(r1, T[0]) - MULT32(r0, T[1]);
281
282 r0 = (w0[1] + w1[1])>>1;
283 r1 = (w0[0] - w1[0])>>1;
284 w0[0] = r0 + r2;
285 w0[1] = r1 + r3;
286 w1[0] = r0 - r2;
287 w1[1] = r3 - r1;
288
289 w0 += 2;
290 }while(w0<w1);
291 }
292
mdct_step8(DATA_TYPE * x,int n,int step)293 STIN void mdct_step8(DATA_TYPE *x, int n, int step){
294 LOOKUP_T *T;
295 LOOKUP_T *V;
296 DATA_TYPE *iX =x+(n>>1);
297 step>>=2;
298
299 switch(step) {
300 default:
301 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
302 do{
303 REG_TYPE r0 = x[0];
304 REG_TYPE r1 = -x[1];
305 XPROD31( r0, r1, T[0], T[1], x, x+1); T+=step;
306 x +=2;
307 }while(x<iX);
308 break;
309
310 case 1:
311 {
312 /* linear interpolation between table values: offset=0.5, step=1 */
313 REG_TYPE t0,t1,v0,v1,r0,r1;
314 T = sincos_lookup0;
315 V = sincos_lookup1;
316 t0 = (*T++)>>1;
317 t1 = (*T++)>>1;
318 do{
319 r0 = x[0];
320 r1 = -x[1];
321 t0 += (v0 = (*V++)>>1);
322 t1 += (v1 = (*V++)>>1);
323 XPROD31( r0, r1, t0, t1, x, x+1 );
324
325 r0 = x[2];
326 r1 = -x[3];
327 v0 += (t0 = (*T++)>>1);
328 v1 += (t1 = (*T++)>>1);
329 XPROD31( r0, r1, v0, v1, x+2, x+3 );
330
331 x += 4;
332 }while(x<iX);
333 break;
334 }
335
336 case 0:
337 {
338 /* linear interpolation between table values: offset=0.25, step=0.5 */
339 REG_TYPE t0,t1,v0,v1,q0,q1,r0,r1;
340 T = sincos_lookup0;
341 V = sincos_lookup1;
342 t0 = *T++;
343 t1 = *T++;
344 do{
345
346
347 v0 = *V++;
348 v1 = *V++;
349 t0 += (q0 = (v0-t0)>>2);
350 t1 += (q1 = (v1-t1)>>2);
351 r0 = x[0];
352 r1 = -x[1];
353 XPROD31( r0, r1, t0, t1, x, x+1 );
354 t0 = v0-q0;
355 t1 = v1-q1;
356 r0 = x[2];
357 r1 = -x[3];
358 XPROD31( r0, r1, t0, t1, x+2, x+3 );
359
360 t0 = *T++;
361 t1 = *T++;
362 v0 += (q0 = (t0-v0)>>2);
363 v1 += (q1 = (t1-v1)>>2);
364 r0 = x[4];
365 r1 = -x[5];
366 XPROD31( r0, r1, v0, v1, x+4, x+5 );
367 v0 = t0-q0;
368 v1 = t1-q1;
369 r0 = x[6];
370 r1 = -x[7];
371 XPROD31( r0, r1, v0, v1, x+5, x+6 );
372
373 x+=8;
374 }while(x<iX);
375 break;
376 }
377 }
378 }
379
380 /* partial; doesn't perform last-step deinterleave/unrolling. That
381 can be done more efficiently during pcm output */
mdct_backward(int n,DATA_TYPE * in)382 void mdct_backward(int n, DATA_TYPE *in){
383 int shift;
384 int step;
385
386 for (shift=4;!(n&(1<<shift));shift++);
387 shift=13-shift;
388 step=2<<shift;
389
390 presymmetry(in,n>>1,step);
391 mdct_butterflies(in,n>>1,shift);
392 mdct_bitreverse(in,n,shift);
393 mdct_step7(in,n,step);
394 mdct_step8(in,n,step);
395 }
396
mdct_shift_right(int n,DATA_TYPE * in,DATA_TYPE * right)397 void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right){
398 int i;
399 n>>=2;
400 in+=1;
401
402 for(i=0;i<n;i++)
403 right[i]=in[i<<1];
404 }
405
mdct_unroll_lap(int n0,int n1,int lW,int W,DATA_TYPE * in,DATA_TYPE * right,LOOKUP_T * w0,LOOKUP_T * w1,ogg_int16_t * out,int step,int start,int end)406 void mdct_unroll_lap(int n0,int n1,
407 int lW,int W,
408 DATA_TYPE *in,
409 DATA_TYPE *right,
410 LOOKUP_T *w0,
411 LOOKUP_T *w1,
412 ogg_int16_t *out,
413 int step,
414 int start, /* samples, this frame */
415 int end /* samples, this frame */){
416
417 DATA_TYPE *l=in+(W&&lW ? n1>>1 : n0>>1);
418 DATA_TYPE *r=right+(lW ? n1>>2 : n0>>2);
419 DATA_TYPE *post;
420 LOOKUP_T *wR=(W && lW ? w1+(n1>>1) : w0+(n0>>1));
421 LOOKUP_T *wL=(W && lW ? w1 : w0 );
422
423 int preLap=(lW && !W ? (n1>>2)-(n0>>2) : 0 );
424 int halfLap=(lW && W ? (n1>>2) : (n0>>2) );
425 int postLap=(!lW && W ? (n1>>2)-(n0>>2) : 0 );
426 int n,off;
427
428 /* preceeding direct-copy lapping from previous frame, if any */
429 if(preLap){
430 n = (end<preLap?end:preLap);
431 off = (start<preLap?start:preLap);
432 post = r-n;
433 r -= off;
434 start -= off;
435 end -= n;
436 while(r>post){
437 *out = CLIP_TO_15((*--r)>>9);
438 out+=step;
439 }
440 }
441
442 /* cross-lap; two halves due to wrap-around */
443 n = (end<halfLap?end:halfLap);
444 off = (start<halfLap?start:halfLap);
445 post = r-n;
446 r -= off;
447 l -= off*2;
448 start -= off;
449 wR -= off;
450 wL += off;
451 end -= n;
452 while(r>post){
453 l-=2;
454 *out = CLIP_TO_15((MULT31(*--r,*--wR) + MULT31(*l,*wL++))>>9);
455 out+=step;
456 }
457
458 n = (end<halfLap?end:halfLap);
459 off = (start<halfLap?start:halfLap);
460 post = r+n;
461 r += off;
462 l += off*2;
463 start -= off;
464 end -= n;
465 wR -= off;
466 wL += off;
467 while(r<post){
468 *out = CLIP_TO_15((MULT31(*r++,*--wR) - MULT31(*l,*wL++))>>9);
469 out+=step;
470 l+=2;
471 }
472
473 /* preceeding direct-copy lapping from previous frame, if any */
474 if(postLap){
475 n = (end<postLap?end:postLap);
476 off = (start<postLap?start:postLap);
477 post = l+n*2;
478 l += off*2;
479 while(l<post){
480 *out = CLIP_TO_15((-*l)>>9);
481 out+=step;
482 l+=2;
483 }
484 }
485 }
486
487