1 /************************************************************************
2 * Copyright (C) 2002-2009, Xiph.org Foundation
3 * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 * * Neither the names of the Xiph.org Foundation nor Pinknoise
17 * Productions Ltd nor the names of its contributors may be used to
18 * endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ************************************************************************
33
34 function: normalized modified discrete cosine transform
35 power of two length transform only [64 <= n ]
36 last mod: $Id: mdct.c,v 1.9.6.5 2003/04/29 04:03:27 xiphmont Exp $
37
38 Original algorithm adapted long ago from _The use of multirate filter
39 banks for coding of high quality digital audio_, by T. Sporer,
40 K. Brandenburg and B. Edler, collection of the European Signal
41 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
42 211-214
43
44 The below code implements an algorithm that no longer looks much like
45 that presented in the paper, but the basic structure remains if you
46 dig deep enough to see it.
47
48 This module DOES NOT INCLUDE code to generate/apply the window
49 function. Everybody has their own weird favorite including me... I
50 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
51 vehemently disagree.
52
53 ************************************************************************/
54
55 #include "ivorbiscodec.h"
56 #include "os.h"
57 #include "misc.h"
58 #include "mdct.h"
59 #include "mdct_lookup.h"
60
61 #include <stdio.h>
62
63 #if defined(ONLY_C)
presymmetry(DATA_TYPE * in,int n2,int step)64 STIN void presymmetry(DATA_TYPE *in,int n2,int step){
65 DATA_TYPE *aX;
66 DATA_TYPE *bX;
67 LOOKUP_T *T;
68 int n4=n2>>1;
69
70 aX = in+n2-3;
71 T = sincos_lookup0;
72
73 do{
74 REG_TYPE s0= aX[0];
75 REG_TYPE s2= aX[2];
76 XPROD31( s0, s2, T[0], T[1], &aX[0], &aX[2] ); T+=step;
77 aX-=4;
78 }while(aX>=in+n4);
79 do{
80 REG_TYPE s0= aX[0];
81 REG_TYPE s2= aX[2];
82 XPROD31( s0, s2, T[1], T[0], &aX[0], &aX[2] ); T-=step;
83 aX-=4;
84 }while(aX>=in);
85
86 aX = in+n2-4;
87 bX = in;
88 T = sincos_lookup0;
89 do{
90 REG_TYPE ri0= aX[0];
91 REG_TYPE ri2= aX[2];
92 REG_TYPE ro0= bX[0];
93 REG_TYPE ro2= bX[2];
94
95 XNPROD31( ro2, ro0, T[1], T[0], &aX[0], &aX[2] ); T+=step;
96 XNPROD31( ri2, ri0, T[0], T[1], &bX[0], &bX[2] );
97
98 aX-=4;
99 bX+=4;
100 }while(aX>=bX);
101 }
102
103 __attribute__((no_sanitize("signed-integer-overflow")))
104 /* 8 point butterfly (in place) */
mdct_butterfly_8(DATA_TYPE * x)105 STIN void mdct_butterfly_8(DATA_TYPE *x){
106
107 REG_TYPE s0 = x[0] + x[1];
108 REG_TYPE s1 = x[0] - x[1];
109 REG_TYPE s2 = x[2] + x[3];
110 REG_TYPE s3 = x[2] - x[3];
111 REG_TYPE s4 = x[4] + x[5];
112 REG_TYPE s5 = x[4] - x[5];
113 REG_TYPE s6 = x[6] + x[7];
114 REG_TYPE s7 = x[6] - x[7];
115
116 x[0] = s5 + s3;
117 x[1] = s7 - s1;
118 x[2] = s5 - s3;
119 x[3] = s7 + s1;
120 x[4] = s4 - s0;
121 x[5] = s6 - s2;
122 x[6] = s4 + s0;
123 x[7] = s6 + s2;
124 MB();
125 }
126
127 __attribute__((no_sanitize("signed-integer-overflow")))
128 /* 16 point butterfly (in place, 4 register) */
mdct_butterfly_16(DATA_TYPE * x)129 STIN void mdct_butterfly_16(DATA_TYPE *x){
130
131 REG_TYPE s0, s1, s2, s3;
132
133 s0 = x[ 8] - x[ 9]; x[ 8] += x[ 9];
134 s1 = x[10] - x[11]; x[10] += x[11];
135 s2 = x[ 1] - x[ 0]; x[ 9] = x[ 1] + x[0];
136 s3 = x[ 3] - x[ 2]; x[11] = x[ 3] + x[2];
137 x[ 0] = MULT31((s0 - s1) , cPI2_8);
138 x[ 1] = MULT31((s2 + s3) , cPI2_8);
139 x[ 2] = MULT31((s0 + s1) , cPI2_8);
140 x[ 3] = MULT31((s3 - s2) , cPI2_8);
141 MB();
142
143 s2 = x[12] - x[13]; x[12] += x[13];
144 s3 = x[14] - x[15]; x[14] += x[15];
145 s0 = x[ 4] - x[ 5]; x[13] = x[ 5] + x[ 4];
146 s1 = x[ 7] - x[ 6]; x[15] = x[ 7] + x[ 6];
147 x[ 4] = s2; x[ 5] = s1;
148 x[ 6] = s3; x[ 7] = s0;
149 MB();
150
151 mdct_butterfly_8(x);
152 mdct_butterfly_8(x+8);
153 }
154
155 __attribute__((no_sanitize("signed-integer-overflow")))
156 /* 32 point butterfly (in place, 4 register) */
mdct_butterfly_32(DATA_TYPE * x)157 STIN void mdct_butterfly_32(DATA_TYPE *x){
158
159 REG_TYPE s0, s1, s2, s3;
160
161 s0 = x[16] - x[17]; x[16] += x[17];
162 s1 = x[18] - x[19]; x[18] += x[19];
163 s2 = x[ 1] - x[ 0]; x[17] = x[ 1] + x[ 0];
164 s3 = x[ 3] - x[ 2]; x[19] = x[ 3] + x[ 2];
165 XNPROD31( s0, s1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] );
166 XPROD31 ( s2, s3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] );
167 MB();
168
169 s0 = x[20] - x[21]; x[20] += x[21];
170 s1 = x[22] - x[23]; x[22] += x[23];
171 s2 = x[ 5] - x[ 4]; x[21] = x[ 5] + x[ 4];
172 s3 = x[ 7] - x[ 6]; x[23] = x[ 7] + x[ 6];
173 x[ 4] = MULT31((s0 - s1) , cPI2_8);
174 x[ 5] = MULT31((s3 + s2) , cPI2_8);
175 x[ 6] = MULT31((s0 + s1) , cPI2_8);
176 x[ 7] = MULT31((s3 - s2) , cPI2_8);
177 MB();
178
179 s0 = x[24] - x[25]; x[24] += x[25];
180 s1 = x[26] - x[27]; x[26] += x[27];
181 s2 = x[ 9] - x[ 8]; x[25] = x[ 9] + x[ 8];
182 s3 = x[11] - x[10]; x[27] = x[11] + x[10];
183 XNPROD31( s0, s1, cPI1_8, cPI3_8, &x[ 8], &x[10] );
184 XPROD31 ( s2, s3, cPI3_8, cPI1_8, &x[ 9], &x[11] );
185 MB();
186
187 s0 = x[28] - x[29]; x[28] += x[29];
188 s1 = x[30] - x[31]; x[30] += x[31];
189 s2 = x[12] - x[13]; x[29] = x[13] + x[12];
190 s3 = x[15] - x[14]; x[31] = x[15] + x[14];
191 x[12] = s0; x[13] = s3;
192 x[14] = s1; x[15] = s2;
193 MB();
194
195 mdct_butterfly_16(x);
196 mdct_butterfly_16(x+16);
197 }
198
199 /* N/stage point generic N stage butterfly (in place, 2 register) */
mdct_butterfly_generic(DATA_TYPE * x,int points,int step)200 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
201 LOOKUP_T *T = sincos_lookup0;
202 DATA_TYPE *x1 = x + points - 4;
203 DATA_TYPE *x2 = x + (points>>1) - 4;
204 REG_TYPE s0, s1, s2, s3;
205
206 do{
207 s0 = x1[0] - x1[1]; x1[0] += x1[1];
208 s1 = x1[3] - x1[2]; x1[2] += x1[3];
209 s2 = x2[1] - x2[0]; x1[1] = x2[1] + x2[0];
210 s3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2];
211 XPROD31( s1, s0, T[0], T[1], &x2[0], &x2[2] );
212 XPROD31( s2, s3, T[0], T[1], &x2[1], &x2[3] ); T+=step;
213 x1-=4;
214 x2-=4;
215 }while(T<sincos_lookup0+1024);
216 x1 = x + (points>>1) + (points>>2) - 4;
217 x2 = x + (points>>2) - 4;
218 T = sincos_lookup0+1024;
219 do{
220 s0 = x1[0] - x1[1]; x1[0] += x1[1];
221 s1 = x1[2] - x1[3]; x1[2] += x1[3];
222 s2 = x2[0] - x2[1]; x1[1] = x2[1] + x2[0];
223 s3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2];
224 XNPROD31( s0, s1, T[0], T[1], &x2[0], &x2[2] );
225 XNPROD31( s3, s2, T[0], T[1], &x2[1], &x2[3] ); T-=step;
226 x1-=4;
227 x2-=4;
228 }while(T>sincos_lookup0);
229 }
230
mdct_butterflies(DATA_TYPE * x,int points,int shift)231 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
232
233 int stages=7-shift;
234 int i,j;
235
236 for(i=0;--stages>=0;i++){
237 for(j=0;j<(1<<i);j++)
238 {
239 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
240 }
241 }
242
243 for(j=0;j<points;j+=32)
244 mdct_butterfly_32(x+j);
245 }
246
247 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
248
bitrev12(int x)249 STIN int bitrev12(int x){
250 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
251 }
252
mdct_bitreverse(DATA_TYPE * x,int n,int shift)253 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int shift){
254 int bit = 0;
255 DATA_TYPE *w = x+(n>>1);
256
257 do{
258 DATA_TYPE b = bitrev12(bit++);
259 DATA_TYPE *xx = x + (b>>shift);
260 REG_TYPE r;
261
262 w -= 2;
263
264 if(w>xx){
265
266 r = xx[0];
267 xx[0] = w[0];
268 w[0] = r;
269
270 r = xx[1];
271 xx[1] = w[1];
272 w[1] = r;
273 }
274 }while(w>x);
275 }
276
277 __attribute__((no_sanitize("signed-integer-overflow")))
mdct_step7(DATA_TYPE * x,int n,int step)278 STIN void mdct_step7(DATA_TYPE *x,int n,int step){
279 DATA_TYPE *w0 = x;
280 DATA_TYPE *w1 = x+(n>>1);
281 LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
282 LOOKUP_T *Ttop = T+1024;
283 REG_TYPE s0, s1, s2, s3;
284
285 do{
286 w1 -= 2;
287
288 s0 = w0[0] + w1[0];
289 s1 = w1[1] - w0[1];
290 s2 = MULT32(s0, T[1]) + MULT32(s1, T[0]);
291 s3 = MULT32(s1, T[1]) - MULT32(s0, T[0]);
292 T+=step;
293
294 s0 = (w0[1] + w1[1])>>1;
295 s1 = (w0[0] - w1[0])>>1;
296 w0[0] = s0 + s2;
297 w0[1] = s1 + s3;
298 w1[0] = s0 - s2;
299 w1[1] = s3 - s1;
300
301 w0 += 2;
302 }while(T<Ttop);
303 do{
304 w1 -= 2;
305
306 s0 = w0[0] + w1[0];
307 s1 = w1[1] - w0[1];
308 T-=step;
309 s2 = MULT32(s0, T[0]) + MULT32(s1, T[1]);
310 s3 = MULT32(s1, T[0]) - MULT32(s0, T[1]);
311
312 s0 = (w0[1] + w1[1])>>1;
313 s1 = (w0[0] - w1[0])>>1;
314 w0[0] = s0 + s2;
315 w0[1] = s1 + s3;
316 w1[0] = s0 - s2;
317 w1[1] = s3 - s1;
318
319 w0 += 2;
320 }while(w0<w1);
321 }
322 #endif
323
324 __attribute__((no_sanitize("signed-integer-overflow")))
mdct_step8(DATA_TYPE * x,int n,int step)325 STIN void mdct_step8(DATA_TYPE *x, int n, int step){
326 LOOKUP_T *T;
327 LOOKUP_T *V;
328 DATA_TYPE *iX =x+(n>>1);
329
330 switch(step) {
331 #if defined(ONLY_C)
332 default:
333 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
334 do{
335 REG_TYPE s0 = x[0];
336 REG_TYPE s1 = -x[1];
337 XPROD31( s0, s1, T[0], T[1], x, x+1); T+=step;
338 x +=2;
339 }while(x<iX);
340 break;
341 #endif
342
343 case 1:
344 {
345 /* linear interpolation between table values: offset=0.5, step=1 */
346 REG_TYPE t0,t1,v0,v1,s0,s1;
347 T = sincos_lookup0;
348 V = sincos_lookup1;
349 t0 = (*T++)>>1;
350 t1 = (*T++)>>1;
351 do{
352 s0 = x[0];
353 s1 = -x[1];
354 t0 += (v0 = (*V++)>>1);
355 t1 += (v1 = (*V++)>>1);
356 XPROD31( s0, s1, t0, t1, x, x+1 );
357
358 s0 = x[2];
359 s1 = -x[3];
360 v0 += (t0 = (*T++)>>1);
361 v1 += (t1 = (*T++)>>1);
362 XPROD31( s0, s1, v0, v1, x+2, x+3 );
363
364 x += 4;
365 }while(x<iX);
366 break;
367 }
368
369 case 0:
370 {
371 /* linear interpolation between table values: offset=0.25, step=0.5 */
372 REG_TYPE t0,t1,v0,v1,q0,q1,s0,s1;
373 T = sincos_lookup0;
374 V = sincos_lookup1;
375 t0 = *T++;
376 t1 = *T++;
377 do{
378
379
380 v0 = *V++;
381 v1 = *V++;
382 t0 += (q0 = (v0-t0)>>2);
383 t1 += (q1 = (v1-t1)>>2);
384 s0 = x[0];
385 s1 = -x[1];
386 XPROD31( s0, s1, t0, t1, x, x+1 );
387 t0 = v0-q0;
388 t1 = v1-q1;
389 s0 = x[2];
390 s1 = -x[3];
391 XPROD31( s0, s1, t0, t1, x+2, x+3 );
392
393 t0 = *T++;
394 t1 = *T++;
395 v0 += (q0 = (t0-v0)>>2);
396 v1 += (q1 = (t1-v1)>>2);
397 s0 = x[4];
398 s1 = -x[5];
399 XPROD31( s0, s1, v0, v1, x+4, x+5 );
400 v0 = t0-q0;
401 v1 = t1-q1;
402 s0 = x[6];
403 s1 = -x[7];
404 XPROD31( s0, s1, v0, v1, x+5, x+6 );
405
406 x+=8;
407 }while(x<iX);
408 break;
409 }
410 }
411 }
412
413 extern int mdct_backwardARM(int n, DATA_TYPE *in);
414
415 /* partial; doesn't perform last-step deinterleave/unrolling. That
416 can be done more efficiently during pcm output */
mdct_backward(int n,DATA_TYPE * in)417 void mdct_backward(int n, DATA_TYPE *in){
418 int step;
419
420 #if defined(ONLY_C)
421 int shift;
422
423 for (shift=4;!(n&(1<<shift));shift++);
424 shift=13-shift;
425 step=2<<shift;
426
427 presymmetry(in,n>>1,step);
428 mdct_butterflies(in,n>>1,shift);
429 mdct_bitreverse(in,n,shift);
430 mdct_step7(in,n,step);
431 mdct_step8(in,n,step>>2);
432 #else
433 step = mdct_backwardARM(n, in);
434 if (step <= 1)
435 mdct_step8(in,n,step);
436 #endif
437 }
438
439 #if defined(ONLY_C)
mdct_shift_right(int n,DATA_TYPE * in,DATA_TYPE * right)440 void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right){
441 int i;
442 n>>=2;
443 in+=1;
444
445 for(i=0;i<n;i++)
446 right[i]=in[i<<1];
447 }
448 #endif
449
450 extern ogg_int16_t *mdct_unroll_prelap(ogg_int16_t *out,
451 DATA_TYPE *post,
452 DATA_TYPE *l,
453 int step);
454 extern ogg_int16_t *mdct_unroll_part2(ogg_int16_t *out,
455 DATA_TYPE *post,
456 DATA_TYPE *l,
457 DATA_TYPE *r,
458 int step,
459 LOOKUP_T *wL,
460 LOOKUP_T *wR);
461 extern ogg_int16_t *mdct_unroll_part3(ogg_int16_t *out,
462 DATA_TYPE *post,
463 DATA_TYPE *l,
464 DATA_TYPE *r,
465 int step,
466 LOOKUP_T *wL,
467 LOOKUP_T *wR);
468 extern ogg_int16_t *mdct_unroll_postlap(ogg_int16_t *out,
469 DATA_TYPE *post,
470 DATA_TYPE *l,
471 int step);
472
mdct_unroll_lap(int n0,int n1,int lW,int W,DATA_TYPE * in,DATA_TYPE * right,LOOKUP_T * w0,LOOKUP_T * w1,ogg_int16_t * out,int step,int start,int end)473 void mdct_unroll_lap(int n0,int n1,
474 int lW,int W,
475 DATA_TYPE *in,
476 DATA_TYPE *right,
477 LOOKUP_T *w0,
478 LOOKUP_T *w1,
479 ogg_int16_t *out,
480 int step,
481 int start, /* samples, this frame */
482 int end /* samples, this frame */){
483
484 DATA_TYPE *l=in+(W&&lW ? n1>>1 : n0>>1);
485 DATA_TYPE *r=right+(lW ? n1>>2 : n0>>2);
486 DATA_TYPE *post;
487 LOOKUP_T *wR=(W && lW ? w1+(n1>>1) : w0+(n0>>1));
488 LOOKUP_T *wL=(W && lW ? w1 : w0 );
489
490 int preLap=(lW && !W ? (n1>>2)-(n0>>2) : 0 );
491 int halfLap=(lW && W ? (n1>>2) : (n0>>2) );
492 int postLap=(!lW && W ? (n1>>2)-(n0>>2) : 0 );
493 int n,off;
494
495 /* preceeding direct-copy lapping from previous frame, if any */
496 if(preLap){
497 n = (end<preLap?end:preLap);
498 off = (start<preLap?start:preLap);
499 post = r-n;
500 r -= off;
501 start -= off;
502 end -= n;
503 #if defined(ONLY_C)
504 while(r>post){
505 *out = CLIP_TO_15((*--r)>>9);
506 out+=step;
507 }
508 #else
509 out = mdct_unroll_prelap(out,post,r,step);
510 n -= off;
511 if (n < 0)
512 n = 0;
513 r -= n;
514 #endif
515 }
516
517 /* cross-lap; two halves due to wrap-around */
518 n = (end<halfLap?end:halfLap);
519 off = (start<halfLap?start:halfLap);
520 post = r-n;
521 r -= off;
522 l -= off*2;
523 start -= off;
524 wR -= off;
525 wL += off;
526 end -= n;
527 #if defined(ONLY_C)
528 while(r>post){
529 l-=2;
530 *out = CLIP_TO_15((MULT31(*--r,*--wR) + MULT31(*l,*wL++))>>9);
531 out+=step;
532 }
533 #else
534 out = mdct_unroll_part2(out, post, l, r, step, wL, wR);
535 n -= off;
536 if (n < 0)
537 n = 0;
538 l -= 2*n;
539 r -= n;
540 wR -= n;
541 wL += n;
542 #endif
543
544 n = (end<halfLap?end:halfLap);
545 off = (start<halfLap?start:halfLap);
546 post = r+n;
547 r += off;
548 l += off*2;
549 start -= off;
550 end -= n;
551 wR -= off;
552 wL += off;
553 #if defined(ONLY_C)
554 while(r<post){
555 *out = CLIP_TO_15((MULT31(*r++,*--wR) - MULT31(*l,*wL++))>>9);
556 out+=step;
557 l+=2;
558 }
559 #else
560 out = mdct_unroll_part3(out, post, l, r, step, wL, wR);
561 n -= off;
562 if (n < 0)
563 n = 0;
564 l += 2*n;
565 r += n;
566 wR -= n;
567 wL += n;
568 #endif
569
570 /* preceeding direct-copy lapping from previous frame, if any */
571 if(postLap){
572 n = (end<postLap?end:postLap);
573 off = (start<postLap?start:postLap);
574 post = l+n*2;
575 l += off*2;
576 #if defined(ONLY_C)
577 while(l<post){
578 *out = CLIP_TO_15((-*l)>>9);
579 out+=step;
580 l+=2;
581 }
582 #else
583 out = mdct_unroll_postlap(out,post,l,step);
584 #endif
585 }
586 }
587
588