1 /************************************************************************
2 * Copyright (C) 2002-2009, Xiph.org Foundation
3 * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 * * Neither the names of the Xiph.org Foundation nor Pinknoise
17 * Productions Ltd nor the names of its contributors may be used to
18 * endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ************************************************************************
33
34 function: normalized modified discrete cosine transform
35 power of two length transform only [64 <= n ]
36 last mod: $Id: mdct.c,v 1.9.6.5 2003/04/29 04:03:27 xiphmont Exp $
37
38 Original algorithm adapted long ago from _The use of multirate filter
39 banks for coding of high quality digital audio_, by T. Sporer,
40 K. Brandenburg and B. Edler, collection of the European Signal
41 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
42 211-214
43
44 The below code implements an algorithm that no longer looks much like
45 that presented in the paper, but the basic structure remains if you
46 dig deep enough to see it.
47
48 This module DOES NOT INCLUDE code to generate/apply the window
49 function. Everybody has their own weird favorite including me... I
50 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
51 vehemently disagree.
52
53 ************************************************************************/
54
55 #include "ivorbiscodec.h"
56 #include "os.h"
57 #include "misc.h"
58 #include "mdct.h"
59 #include "mdct_lookup.h"
60
61 #include <stdio.h>
62
63 #if defined(ONLY_C)
presymmetry(DATA_TYPE * in,int n2,int step)64 STIN void presymmetry(DATA_TYPE *in,int n2,int step){
65 DATA_TYPE *aX;
66 DATA_TYPE *bX;
67 LOOKUP_T *T;
68 int n4=n2>>1;
69
70 aX = in+n2-3;
71 T = sincos_lookup0;
72
73 do{
74 REG_TYPE s0= aX[0];
75 REG_TYPE s2= aX[2];
76 XPROD31( s0, s2, T[0], T[1], &aX[0], &aX[2] ); T+=step;
77 aX-=4;
78 }while(aX>=in+n4);
79 do{
80 REG_TYPE s0= aX[0];
81 REG_TYPE s2= aX[2];
82 XPROD31( s0, s2, T[1], T[0], &aX[0], &aX[2] ); T-=step;
83 aX-=4;
84 }while(aX>=in);
85
86 aX = in+n2-4;
87 bX = in;
88 T = sincos_lookup0;
89 do{
90 REG_TYPE ri0= aX[0];
91 REG_TYPE ri2= aX[2];
92 REG_TYPE ro0= bX[0];
93 REG_TYPE ro2= bX[2];
94
95 XNPROD31( ro2, ro0, T[1], T[0], &aX[0], &aX[2] ); T+=step;
96 XNPROD31( ri2, ri0, T[0], T[1], &bX[0], &bX[2] );
97
98 aX-=4;
99 bX+=4;
100 }while(aX>=bX);
101 }
102
103 /* 8 point butterfly (in place) */
mdct_butterfly_8(DATA_TYPE * x)104 STIN void mdct_butterfly_8(DATA_TYPE *x){
105
106 REG_TYPE s0 = x[0] + x[1];
107 REG_TYPE s1 = x[0] - x[1];
108 REG_TYPE s2 = x[2] + x[3];
109 REG_TYPE s3 = x[2] - x[3];
110 REG_TYPE s4 = x[4] + x[5];
111 REG_TYPE s5 = x[4] - x[5];
112 REG_TYPE s6 = x[6] + x[7];
113 REG_TYPE s7 = x[6] - x[7];
114
115 x[0] = s5 + s3;
116 x[1] = s7 - s1;
117 x[2] = s5 - s3;
118 x[3] = s7 + s1;
119 x[4] = s4 - s0;
120 x[5] = s6 - s2;
121 x[6] = s4 + s0;
122 x[7] = s6 + s2;
123 MB();
124 }
125
126 /* 16 point butterfly (in place, 4 register) */
mdct_butterfly_16(DATA_TYPE * x)127 STIN void mdct_butterfly_16(DATA_TYPE *x){
128
129 REG_TYPE s0, s1, s2, s3;
130
131 s0 = x[ 8] - x[ 9]; x[ 8] += x[ 9];
132 s1 = x[10] - x[11]; x[10] += x[11];
133 s2 = x[ 1] - x[ 0]; x[ 9] = x[ 1] + x[0];
134 s3 = x[ 3] - x[ 2]; x[11] = x[ 3] + x[2];
135 x[ 0] = MULT31((s0 - s1) , cPI2_8);
136 x[ 1] = MULT31((s2 + s3) , cPI2_8);
137 x[ 2] = MULT31((s0 + s1) , cPI2_8);
138 x[ 3] = MULT31((s3 - s2) , cPI2_8);
139 MB();
140
141 s2 = x[12] - x[13]; x[12] += x[13];
142 s3 = x[14] - x[15]; x[14] += x[15];
143 s0 = x[ 4] - x[ 5]; x[13] = x[ 5] + x[ 4];
144 s1 = x[ 7] - x[ 6]; x[15] = x[ 7] + x[ 6];
145 x[ 4] = s2; x[ 5] = s1;
146 x[ 6] = s3; x[ 7] = s0;
147 MB();
148
149 mdct_butterfly_8(x);
150 mdct_butterfly_8(x+8);
151 }
152
153 /* 32 point butterfly (in place, 4 register) */
mdct_butterfly_32(DATA_TYPE * x)154 STIN void mdct_butterfly_32(DATA_TYPE *x){
155
156 REG_TYPE s0, s1, s2, s3;
157
158 s0 = x[16] - x[17]; x[16] += x[17];
159 s1 = x[18] - x[19]; x[18] += x[19];
160 s2 = x[ 1] - x[ 0]; x[17] = x[ 1] + x[ 0];
161 s3 = x[ 3] - x[ 2]; x[19] = x[ 3] + x[ 2];
162 XNPROD31( s0, s1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] );
163 XPROD31 ( s2, s3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] );
164 MB();
165
166 s0 = x[20] - x[21]; x[20] += x[21];
167 s1 = x[22] - x[23]; x[22] += x[23];
168 s2 = x[ 5] - x[ 4]; x[21] = x[ 5] + x[ 4];
169 s3 = x[ 7] - x[ 6]; x[23] = x[ 7] + x[ 6];
170 x[ 4] = MULT31((s0 - s1) , cPI2_8);
171 x[ 5] = MULT31((s3 + s2) , cPI2_8);
172 x[ 6] = MULT31((s0 + s1) , cPI2_8);
173 x[ 7] = MULT31((s3 - s2) , cPI2_8);
174 MB();
175
176 s0 = x[24] - x[25]; x[24] += x[25];
177 s1 = x[26] - x[27]; x[26] += x[27];
178 s2 = x[ 9] - x[ 8]; x[25] = x[ 9] + x[ 8];
179 s3 = x[11] - x[10]; x[27] = x[11] + x[10];
180 XNPROD31( s0, s1, cPI1_8, cPI3_8, &x[ 8], &x[10] );
181 XPROD31 ( s2, s3, cPI3_8, cPI1_8, &x[ 9], &x[11] );
182 MB();
183
184 s0 = x[28] - x[29]; x[28] += x[29];
185 s1 = x[30] - x[31]; x[30] += x[31];
186 s2 = x[12] - x[13]; x[29] = x[13] + x[12];
187 s3 = x[15] - x[14]; x[31] = x[15] + x[14];
188 x[12] = s0; x[13] = s3;
189 x[14] = s1; x[15] = s2;
190 MB();
191
192 mdct_butterfly_16(x);
193 mdct_butterfly_16(x+16);
194 }
195
196 /* N/stage point generic N stage butterfly (in place, 2 register) */
mdct_butterfly_generic(DATA_TYPE * x,int points,int step)197 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
198 LOOKUP_T *T = sincos_lookup0;
199 DATA_TYPE *x1 = x + points - 4;
200 DATA_TYPE *x2 = x + (points>>1) - 4;
201 REG_TYPE s0, s1, s2, s3;
202
203 do{
204 s0 = x1[0] - x1[1]; x1[0] += x1[1];
205 s1 = x1[3] - x1[2]; x1[2] += x1[3];
206 s2 = x2[1] - x2[0]; x1[1] = x2[1] + x2[0];
207 s3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2];
208 XPROD31( s1, s0, T[0], T[1], &x2[0], &x2[2] );
209 XPROD31( s2, s3, T[0], T[1], &x2[1], &x2[3] ); T+=step;
210 x1-=4;
211 x2-=4;
212 }while(T<sincos_lookup0+1024);
213 x1 = x + (points>>1) + (points>>2) - 4;
214 x2 = x + (points>>2) - 4;
215 T = sincos_lookup0+1024;
216 do{
217 s0 = x1[0] - x1[1]; x1[0] += x1[1];
218 s1 = x1[2] - x1[3]; x1[2] += x1[3];
219 s2 = x2[0] - x2[1]; x1[1] = x2[1] + x2[0];
220 s3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2];
221 XNPROD31( s0, s1, T[0], T[1], &x2[0], &x2[2] );
222 XNPROD31( s3, s2, T[0], T[1], &x2[1], &x2[3] ); T-=step;
223 x1-=4;
224 x2-=4;
225 }while(T>sincos_lookup0);
226 }
227
mdct_butterflies(DATA_TYPE * x,int points,int shift)228 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
229
230 int stages=7-shift;
231 int i,j;
232
233 for(i=0;--stages>=0;i++){
234 for(j=0;j<(1<<i);j++)
235 {
236 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
237 }
238 }
239
240 for(j=0;j<points;j+=32)
241 mdct_butterfly_32(x+j);
242 }
243
244 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
245
bitrev12(int x)246 STIN int bitrev12(int x){
247 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
248 }
249
mdct_bitreverse(DATA_TYPE * x,int n,int shift)250 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int shift){
251 int bit = 0;
252 DATA_TYPE *w = x+(n>>1);
253
254 do{
255 DATA_TYPE b = bitrev12(bit++);
256 DATA_TYPE *xx = x + (b>>shift);
257 REG_TYPE r;
258
259 w -= 2;
260
261 if(w>xx){
262
263 r = xx[0];
264 xx[0] = w[0];
265 w[0] = r;
266
267 r = xx[1];
268 xx[1] = w[1];
269 w[1] = r;
270 }
271 }while(w>x);
272 }
273
mdct_step7(DATA_TYPE * x,int n,int step)274 STIN void mdct_step7(DATA_TYPE *x,int n,int step){
275 DATA_TYPE *w0 = x;
276 DATA_TYPE *w1 = x+(n>>1);
277 LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
278 LOOKUP_T *Ttop = T+1024;
279 REG_TYPE s0, s1, s2, s3;
280
281 do{
282 w1 -= 2;
283
284 s0 = w0[0] + w1[0];
285 s1 = w1[1] - w0[1];
286 s2 = MULT32(s0, T[1]) + MULT32(s1, T[0]);
287 s3 = MULT32(s1, T[1]) - MULT32(s0, T[0]);
288 T+=step;
289
290 s0 = (w0[1] + w1[1])>>1;
291 s1 = (w0[0] - w1[0])>>1;
292 w0[0] = s0 + s2;
293 w0[1] = s1 + s3;
294 w1[0] = s0 - s2;
295 w1[1] = s3 - s1;
296
297 w0 += 2;
298 }while(T<Ttop);
299 do{
300 w1 -= 2;
301
302 s0 = w0[0] + w1[0];
303 s1 = w1[1] - w0[1];
304 T-=step;
305 s2 = MULT32(s0, T[0]) + MULT32(s1, T[1]);
306 s3 = MULT32(s1, T[0]) - MULT32(s0, T[1]);
307
308 s0 = (w0[1] + w1[1])>>1;
309 s1 = (w0[0] - w1[0])>>1;
310 w0[0] = s0 + s2;
311 w0[1] = s1 + s3;
312 w1[0] = s0 - s2;
313 w1[1] = s3 - s1;
314
315 w0 += 2;
316 }while(w0<w1);
317 }
318 #endif
319
mdct_step8(DATA_TYPE * x,int n,int step)320 STIN void mdct_step8(DATA_TYPE *x, int n, int step){
321 LOOKUP_T *T;
322 LOOKUP_T *V;
323 DATA_TYPE *iX =x+(n>>1);
324
325 switch(step) {
326 #if defined(ONLY_C)
327 default:
328 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
329 do{
330 REG_TYPE s0 = x[0];
331 REG_TYPE s1 = -x[1];
332 XPROD31( s0, s1, T[0], T[1], x, x+1); T+=step;
333 x +=2;
334 }while(x<iX);
335 break;
336 #endif
337
338 case 1:
339 {
340 /* linear interpolation between table values: offset=0.5, step=1 */
341 REG_TYPE t0,t1,v0,v1,s0,s1;
342 T = sincos_lookup0;
343 V = sincos_lookup1;
344 t0 = (*T++)>>1;
345 t1 = (*T++)>>1;
346 do{
347 s0 = x[0];
348 s1 = -x[1];
349 t0 += (v0 = (*V++)>>1);
350 t1 += (v1 = (*V++)>>1);
351 XPROD31( s0, s1, t0, t1, x, x+1 );
352
353 s0 = x[2];
354 s1 = -x[3];
355 v0 += (t0 = (*T++)>>1);
356 v1 += (t1 = (*T++)>>1);
357 XPROD31( s0, s1, v0, v1, x+2, x+3 );
358
359 x += 4;
360 }while(x<iX);
361 break;
362 }
363
364 case 0:
365 {
366 /* linear interpolation between table values: offset=0.25, step=0.5 */
367 REG_TYPE t0,t1,v0,v1,q0,q1,s0,s1;
368 T = sincos_lookup0;
369 V = sincos_lookup1;
370 t0 = *T++;
371 t1 = *T++;
372 do{
373
374
375 v0 = *V++;
376 v1 = *V++;
377 t0 += (q0 = (v0-t0)>>2);
378 t1 += (q1 = (v1-t1)>>2);
379 s0 = x[0];
380 s1 = -x[1];
381 XPROD31( s0, s1, t0, t1, x, x+1 );
382 t0 = v0-q0;
383 t1 = v1-q1;
384 s0 = x[2];
385 s1 = -x[3];
386 XPROD31( s0, s1, t0, t1, x+2, x+3 );
387
388 t0 = *T++;
389 t1 = *T++;
390 v0 += (q0 = (t0-v0)>>2);
391 v1 += (q1 = (t1-v1)>>2);
392 s0 = x[4];
393 s1 = -x[5];
394 XPROD31( s0, s1, v0, v1, x+4, x+5 );
395 v0 = t0-q0;
396 v1 = t1-q1;
397 s0 = x[6];
398 s1 = -x[7];
399 XPROD31( s0, s1, v0, v1, x+5, x+6 );
400
401 x+=8;
402 }while(x<iX);
403 break;
404 }
405 }
406 }
407
408 extern int mdct_backwardARM(int n, DATA_TYPE *in);
409
410 /* partial; doesn't perform last-step deinterleave/unrolling. That
411 can be done more efficiently during pcm output */
mdct_backward(int n,DATA_TYPE * in)412 void mdct_backward(int n, DATA_TYPE *in){
413 int step;
414
415 #if defined(ONLY_C)
416 int shift;
417
418 for (shift=4;!(n&(1<<shift));shift++);
419 shift=13-shift;
420 step=2<<shift;
421
422 presymmetry(in,n>>1,step);
423 mdct_butterflies(in,n>>1,shift);
424 mdct_bitreverse(in,n,shift);
425 mdct_step7(in,n,step);
426 mdct_step8(in,n,step>>2);
427 #else
428 step = mdct_backwardARM(n, in);
429 if (step <= 1)
430 mdct_step8(in,n,step);
431 #endif
432 }
433
434 #if defined(ONLY_C)
mdct_shift_right(int n,DATA_TYPE * in,DATA_TYPE * right)435 void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right){
436 int i;
437 n>>=2;
438 in+=1;
439
440 for(i=0;i<n;i++)
441 right[i]=in[i<<1];
442 }
443 #endif
444
445 extern ogg_int16_t *mdct_unroll_prelap(ogg_int16_t *out,
446 DATA_TYPE *post,
447 DATA_TYPE *l,
448 int step);
449 extern ogg_int16_t *mdct_unroll_part2(ogg_int16_t *out,
450 DATA_TYPE *post,
451 DATA_TYPE *l,
452 DATA_TYPE *r,
453 int step,
454 LOOKUP_T *wL,
455 LOOKUP_T *wR);
456 extern ogg_int16_t *mdct_unroll_part3(ogg_int16_t *out,
457 DATA_TYPE *post,
458 DATA_TYPE *l,
459 DATA_TYPE *r,
460 int step,
461 LOOKUP_T *wL,
462 LOOKUP_T *wR);
463 extern ogg_int16_t *mdct_unroll_postlap(ogg_int16_t *out,
464 DATA_TYPE *post,
465 DATA_TYPE *l,
466 int step);
467
mdct_unroll_lap(int n0,int n1,int lW,int W,DATA_TYPE * in,DATA_TYPE * right,LOOKUP_T * w0,LOOKUP_T * w1,ogg_int16_t * out,int step,int start,int end)468 void mdct_unroll_lap(int n0,int n1,
469 int lW,int W,
470 DATA_TYPE *in,
471 DATA_TYPE *right,
472 LOOKUP_T *w0,
473 LOOKUP_T *w1,
474 ogg_int16_t *out,
475 int step,
476 int start, /* samples, this frame */
477 int end /* samples, this frame */){
478
479 DATA_TYPE *l=in+(W&&lW ? n1>>1 : n0>>1);
480 DATA_TYPE *r=right+(lW ? n1>>2 : n0>>2);
481 DATA_TYPE *post;
482 LOOKUP_T *wR=(W && lW ? w1+(n1>>1) : w0+(n0>>1));
483 LOOKUP_T *wL=(W && lW ? w1 : w0 );
484
485 int preLap=(lW && !W ? (n1>>2)-(n0>>2) : 0 );
486 int halfLap=(lW && W ? (n1>>2) : (n0>>2) );
487 int postLap=(!lW && W ? (n1>>2)-(n0>>2) : 0 );
488 int n,off;
489
490 /* preceeding direct-copy lapping from previous frame, if any */
491 if(preLap){
492 n = (end<preLap?end:preLap);
493 off = (start<preLap?start:preLap);
494 post = r-n;
495 r -= off;
496 start -= off;
497 end -= n;
498 #if defined(ONLY_C)
499 while(r>post){
500 *out = CLIP_TO_15((*--r)>>9);
501 out+=step;
502 }
503 #else
504 out = mdct_unroll_prelap(out,post,r,step);
505 n -= off;
506 if (n < 0)
507 n = 0;
508 r -= n;
509 #endif
510 }
511
512 /* cross-lap; two halves due to wrap-around */
513 n = (end<halfLap?end:halfLap);
514 off = (start<halfLap?start:halfLap);
515 post = r-n;
516 r -= off;
517 l -= off*2;
518 start -= off;
519 wR -= off;
520 wL += off;
521 end -= n;
522 #if defined(ONLY_C)
523 while(r>post){
524 l-=2;
525 *out = CLIP_TO_15((MULT31(*--r,*--wR) + MULT31(*l,*wL++))>>9);
526 out+=step;
527 }
528 #else
529 out = mdct_unroll_part2(out, post, l, r, step, wL, wR);
530 n -= off;
531 if (n < 0)
532 n = 0;
533 l -= 2*n;
534 r -= n;
535 wR -= n;
536 wL += n;
537 #endif
538
539 n = (end<halfLap?end:halfLap);
540 off = (start<halfLap?start:halfLap);
541 post = r+n;
542 r += off;
543 l += off*2;
544 start -= off;
545 end -= n;
546 wR -= off;
547 wL += off;
548 #if defined(ONLY_C)
549 while(r<post){
550 *out = CLIP_TO_15((MULT31(*r++,*--wR) - MULT31(*l,*wL++))>>9);
551 out+=step;
552 l+=2;
553 }
554 #else
555 out = mdct_unroll_part3(out, post, l, r, step, wL, wR);
556 n -= off;
557 if (n < 0)
558 n = 0;
559 l += 2*n;
560 r += n;
561 wR -= n;
562 wL += n;
563 #endif
564
565 /* preceeding direct-copy lapping from previous frame, if any */
566 if(postLap){
567 n = (end<postLap?end:postLap);
568 off = (start<postLap?start:postLap);
569 post = l+n*2;
570 l += off*2;
571 #if defined(ONLY_C)
572 while(l<post){
573 *out = CLIP_TO_15((-*l)>>9);
574 out+=step;
575 l+=2;
576 }
577 #else
578 out = mdct_unroll_postlap(out,post,l,step);
579 #endif
580 }
581 }
582
583