1 /*
2 * Copyright (C) 2016 foo86
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/mem.h"
22 #include "libavutil/mem_internal.h"
23
24 #include "dcadsp.h"
25 #include "dcamath.h"
26
decode_hf_c(int32_t ** dst,const int32_t * vq_index,const int8_t hf_vq[1024][32],int32_t scale_factors[32][2],ptrdiff_t sb_start,ptrdiff_t sb_end,ptrdiff_t ofs,ptrdiff_t len)27 static void decode_hf_c(int32_t **dst,
28 const int32_t *vq_index,
29 const int8_t hf_vq[1024][32],
30 int32_t scale_factors[32][2],
31 ptrdiff_t sb_start, ptrdiff_t sb_end,
32 ptrdiff_t ofs, ptrdiff_t len)
33 {
34 int i, j;
35
36 for (i = sb_start; i < sb_end; i++) {
37 const int8_t *coeff = hf_vq[vq_index[i]];
38 int32_t scale = scale_factors[i][0];
39 for (j = 0; j < len; j++)
40 dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
41 }
42 }
43
decode_joint_c(int32_t ** dst,int32_t ** src,const int32_t * scale_factors,ptrdiff_t sb_start,ptrdiff_t sb_end,ptrdiff_t ofs,ptrdiff_t len)44 static void decode_joint_c(int32_t **dst, int32_t **src,
45 const int32_t *scale_factors,
46 ptrdiff_t sb_start, ptrdiff_t sb_end,
47 ptrdiff_t ofs, ptrdiff_t len)
48 {
49 int i, j;
50
51 for (i = sb_start; i < sb_end; i++) {
52 int32_t scale = scale_factors[i];
53 for (j = 0; j < len; j++)
54 dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
55 }
56 }
57
lfe_fir_float_c(float * pcm_samples,int32_t * lfe_samples,const float * filter_coeff,ptrdiff_t npcmblocks,int dec_select)58 static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
59 const float *filter_coeff, ptrdiff_t npcmblocks,
60 int dec_select)
61 {
62 // Select decimation factor
63 int factor = 64 << dec_select;
64 int ncoeffs = 8 >> dec_select;
65 int nlfesamples = npcmblocks >> (dec_select + 1);
66 int i, j, k;
67
68 for (i = 0; i < nlfesamples; i++) {
69 // One decimated sample generates 64 or 128 interpolated ones
70 for (j = 0; j < factor / 2; j++) {
71 float a = 0;
72 float b = 0;
73
74 for (k = 0; k < ncoeffs; k++) {
75 a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k];
76 b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
77 }
78
79 pcm_samples[ j] = a;
80 pcm_samples[factor / 2 + j] = b;
81 }
82
83 lfe_samples++;
84 pcm_samples += factor;
85 }
86 }
87
lfe_fir0_float_c(float * pcm_samples,int32_t * lfe_samples,const float * filter_coeff,ptrdiff_t npcmblocks)88 static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
89 const float *filter_coeff, ptrdiff_t npcmblocks)
90 {
91 lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
92 }
93
lfe_fir1_float_c(float * pcm_samples,int32_t * lfe_samples,const float * filter_coeff,ptrdiff_t npcmblocks)94 static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
95 const float *filter_coeff, ptrdiff_t npcmblocks)
96 {
97 lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
98 }
99
lfe_x96_float_c(float * dst,const float * src,float * hist,ptrdiff_t len)100 static void lfe_x96_float_c(float *dst, const float *src,
101 float *hist, ptrdiff_t len)
102 {
103 float prev = *hist;
104 int i;
105
106 for (i = 0; i < len; i++) {
107 float a = 0.25f * src[i] + 0.75f * prev;
108 float b = 0.75f * src[i] + 0.25f * prev;
109 prev = src[i];
110 *dst++ = a;
111 *dst++ = b;
112 }
113
114 *hist = prev;
115 }
116
sub_qmf32_float_c(SynthFilterContext * synth,FFTContext * imdct,float * pcm_samples,int32_t ** subband_samples_lo,int32_t ** subband_samples_hi,float * hist1,int * offset,float * hist2,const float * filter_coeff,ptrdiff_t npcmblocks,float scale)117 static void sub_qmf32_float_c(SynthFilterContext *synth,
118 FFTContext *imdct,
119 float *pcm_samples,
120 int32_t **subband_samples_lo,
121 int32_t **subband_samples_hi,
122 float *hist1, int *offset, float *hist2,
123 const float *filter_coeff, ptrdiff_t npcmblocks,
124 float scale)
125 {
126 LOCAL_ALIGNED_32(float, input, [32]);
127 int i, j;
128
129 for (j = 0; j < npcmblocks; j++) {
130 // Load in one sample from each subband
131 for (i = 0; i < 32; i++) {
132 if ((i - 1) & 2)
133 input[i] = -subband_samples_lo[i][j];
134 else
135 input[i] = subband_samples_lo[i][j];
136 }
137
138 // One subband sample generates 32 interpolated ones
139 synth->synth_filter_float(imdct, hist1, offset,
140 hist2, filter_coeff,
141 pcm_samples, input, scale);
142 pcm_samples += 32;
143 }
144 }
145
sub_qmf64_float_c(SynthFilterContext * synth,FFTContext * imdct,float * pcm_samples,int32_t ** subband_samples_lo,int32_t ** subband_samples_hi,float * hist1,int * offset,float * hist2,const float * filter_coeff,ptrdiff_t npcmblocks,float scale)146 static void sub_qmf64_float_c(SynthFilterContext *synth,
147 FFTContext *imdct,
148 float *pcm_samples,
149 int32_t **subband_samples_lo,
150 int32_t **subband_samples_hi,
151 float *hist1, int *offset, float *hist2,
152 const float *filter_coeff, ptrdiff_t npcmblocks,
153 float scale)
154 {
155 LOCAL_ALIGNED_32(float, input, [64]);
156 int i, j;
157
158 if (!subband_samples_hi)
159 memset(&input[32], 0, sizeof(input[0]) * 32);
160
161 for (j = 0; j < npcmblocks; j++) {
162 // Load in one sample from each subband
163 if (subband_samples_hi) {
164 // Full 64 subbands, first 32 are residual coded
165 for (i = 0; i < 32; i++) {
166 if ((i - 1) & 2)
167 input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
168 else
169 input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
170 }
171 for (i = 32; i < 64; i++) {
172 if ((i - 1) & 2)
173 input[i] = -subband_samples_hi[i][j];
174 else
175 input[i] = subband_samples_hi[i][j];
176 }
177 } else {
178 // Only first 32 subbands
179 for (i = 0; i < 32; i++) {
180 if ((i - 1) & 2)
181 input[i] = -subband_samples_lo[i][j];
182 else
183 input[i] = subband_samples_lo[i][j];
184 }
185 }
186
187 // One subband sample generates 64 interpolated ones
188 synth->synth_filter_float_64(imdct, hist1, offset,
189 hist2, filter_coeff,
190 pcm_samples, input, scale);
191 pcm_samples += 64;
192 }
193 }
194
lfe_fir_fixed_c(int32_t * pcm_samples,int32_t * lfe_samples,const int32_t * filter_coeff,ptrdiff_t npcmblocks)195 static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
196 const int32_t *filter_coeff, ptrdiff_t npcmblocks)
197 {
198 // Select decimation factor
199 int nlfesamples = npcmblocks >> 1;
200 int i, j, k;
201
202 for (i = 0; i < nlfesamples; i++) {
203 // One decimated sample generates 64 interpolated ones
204 for (j = 0; j < 32; j++) {
205 int64_t a = 0;
206 int64_t b = 0;
207
208 for (k = 0; k < 8; k++) {
209 a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k];
210 b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
211 }
212
213 pcm_samples[ j] = clip23(norm23(a));
214 pcm_samples[32 + j] = clip23(norm23(b));
215 }
216
217 lfe_samples++;
218 pcm_samples += 64;
219 }
220 }
221
lfe_x96_fixed_c(int32_t * dst,const int32_t * src,int32_t * hist,ptrdiff_t len)222 static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
223 int32_t *hist, ptrdiff_t len)
224 {
225 int32_t prev = *hist;
226 int i;
227
228 for (i = 0; i < len; i++) {
229 int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
230 int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
231 prev = src[i];
232 *dst++ = clip23(norm23(a));
233 *dst++ = clip23(norm23(b));
234 }
235
236 *hist = prev;
237 }
238
sub_qmf32_fixed_c(SynthFilterContext * synth,DCADCTContext * imdct,int32_t * pcm_samples,int32_t ** subband_samples_lo,int32_t ** subband_samples_hi,int32_t * hist1,int * offset,int32_t * hist2,const int32_t * filter_coeff,ptrdiff_t npcmblocks)239 static void sub_qmf32_fixed_c(SynthFilterContext *synth,
240 DCADCTContext *imdct,
241 int32_t *pcm_samples,
242 int32_t **subband_samples_lo,
243 int32_t **subband_samples_hi,
244 int32_t *hist1, int *offset, int32_t *hist2,
245 const int32_t *filter_coeff, ptrdiff_t npcmblocks)
246 {
247 LOCAL_ALIGNED_32(int32_t, input, [32]);
248 int i, j;
249
250 for (j = 0; j < npcmblocks; j++) {
251 // Load in one sample from each subband
252 for (i = 0; i < 32; i++)
253 input[i] = subband_samples_lo[i][j];
254
255 // One subband sample generates 32 interpolated ones
256 synth->synth_filter_fixed(imdct, hist1, offset,
257 hist2, filter_coeff,
258 pcm_samples, input);
259 pcm_samples += 32;
260 }
261 }
262
sub_qmf64_fixed_c(SynthFilterContext * synth,DCADCTContext * imdct,int32_t * pcm_samples,int32_t ** subband_samples_lo,int32_t ** subband_samples_hi,int32_t * hist1,int * offset,int32_t * hist2,const int32_t * filter_coeff,ptrdiff_t npcmblocks)263 static void sub_qmf64_fixed_c(SynthFilterContext *synth,
264 DCADCTContext *imdct,
265 int32_t *pcm_samples,
266 int32_t **subband_samples_lo,
267 int32_t **subband_samples_hi,
268 int32_t *hist1, int *offset, int32_t *hist2,
269 const int32_t *filter_coeff, ptrdiff_t npcmblocks)
270 {
271 LOCAL_ALIGNED_32(int32_t, input, [64]);
272 int i, j;
273
274 if (!subband_samples_hi)
275 memset(&input[32], 0, sizeof(input[0]) * 32);
276
277 for (j = 0; j < npcmblocks; j++) {
278 // Load in one sample from each subband
279 if (subband_samples_hi) {
280 // Full 64 subbands, first 32 are residual coded
281 for (i = 0; i < 32; i++)
282 input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
283 for (i = 32; i < 64; i++)
284 input[i] = subband_samples_hi[i][j];
285 } else {
286 // Only first 32 subbands
287 for (i = 0; i < 32; i++)
288 input[i] = subband_samples_lo[i][j];
289 }
290
291 // One subband sample generates 64 interpolated ones
292 synth->synth_filter_fixed_64(imdct, hist1, offset,
293 hist2, filter_coeff,
294 pcm_samples, input);
295 pcm_samples += 64;
296 }
297 }
298
decor_c(int32_t * dst,const int32_t * src,int coeff,ptrdiff_t len)299 static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
300 {
301 int i;
302
303 for (i = 0; i < len; i++)
304 dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
305 }
306
dmix_sub_xch_c(int32_t * dst1,int32_t * dst2,const int32_t * src,ptrdiff_t len)307 static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
308 const int32_t *src, ptrdiff_t len)
309 {
310 int i;
311
312 for (i = 0; i < len; i++) {
313 int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
314 dst1[i] -= cs;
315 dst2[i] -= cs;
316 }
317 }
318
dmix_sub_c(int32_t * dst,const int32_t * src,int coeff,ptrdiff_t len)319 static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
320 {
321 int i;
322
323 for (i = 0; i < len; i++)
324 dst[i] -= (unsigned)mul15(src[i], coeff);
325 }
326
dmix_add_c(int32_t * dst,const int32_t * src,int coeff,ptrdiff_t len)327 static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
328 {
329 int i;
330
331 for (i = 0; i < len; i++)
332 dst[i] += (unsigned)mul15(src[i], coeff);
333 }
334
dmix_scale_c(int32_t * dst,int scale,ptrdiff_t len)335 static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
336 {
337 int i;
338
339 for (i = 0; i < len; i++)
340 dst[i] = mul15(dst[i], scale);
341 }
342
dmix_scale_inv_c(int32_t * dst,int scale_inv,ptrdiff_t len)343 static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
344 {
345 int i;
346
347 for (i = 0; i < len; i++)
348 dst[i] = mul16(dst[i], scale_inv);
349 }
350
filter0(SUINT32 * dst,const int32_t * src,int32_t coeff,ptrdiff_t len)351 static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
352 {
353 int i;
354
355 for (i = 0; i < len; i++)
356 dst[i] -= mul22(src[i], coeff);
357 }
358
filter1(SUINT32 * dst,const int32_t * src,int32_t coeff,ptrdiff_t len)359 static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
360 {
361 int i;
362
363 for (i = 0; i < len; i++)
364 dst[i] -= mul23(src[i], coeff);
365 }
366
assemble_freq_bands_c(int32_t * dst,int32_t * src0,int32_t * src1,const int32_t * coeff,ptrdiff_t len)367 static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
368 const int32_t *coeff, ptrdiff_t len)
369 {
370 int i;
371
372 filter0(src0, src1, coeff[0], len);
373 filter0(src1, src0, coeff[1], len);
374 filter0(src0, src1, coeff[2], len);
375 filter0(src1, src0, coeff[3], len);
376
377 for (i = 0; i < 8; i++, src0--) {
378 filter1(src0, src1, coeff[i + 4], len);
379 filter1(src1, src0, coeff[i + 12], len);
380 filter1(src0, src1, coeff[i + 4], len);
381 }
382
383 for (i = 0; i < len; i++) {
384 *dst++ = *src1++;
385 *dst++ = *++src0;
386 }
387 }
388
lbr_bank_c(float output[32][4],float ** input,const float * coeff,ptrdiff_t ofs,ptrdiff_t len)389 static void lbr_bank_c(float output[32][4], float **input,
390 const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
391 {
392 float SW0 = coeff[0];
393 float SW1 = coeff[1];
394 float SW2 = coeff[2];
395 float SW3 = coeff[3];
396
397 float C1 = coeff[4];
398 float C2 = coeff[5];
399 float C3 = coeff[6];
400 float C4 = coeff[7];
401
402 float AL1 = coeff[8];
403 float AL2 = coeff[9];
404
405 int i;
406
407 // Short window and 8 point forward MDCT
408 for (i = 0; i < len; i++) {
409 float *src = input[i] + ofs;
410
411 float a = src[-4] * SW0 - src[-1] * SW3;
412 float b = src[-3] * SW1 - src[-2] * SW2;
413 float c = src[ 2] * SW1 + src[ 1] * SW2;
414 float d = src[ 3] * SW0 + src[ 0] * SW3;
415
416 output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
417 output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
418 output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
419 output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
420 }
421
422 // Aliasing cancellation for high frequencies
423 for (i = 12; i < len - 1; i++) {
424 float a = output[i ][3] * AL1;
425 float b = output[i+1][0] * AL1;
426 output[i ][3] += b - a;
427 output[i+1][0] -= b + a;
428 a = output[i ][2] * AL2;
429 b = output[i+1][1] * AL2;
430 output[i ][2] += b - a;
431 output[i+1][1] -= b + a;
432 }
433 }
434
lfe_iir_c(float * output,const float * input,const float iir[5][4],float hist[5][2],ptrdiff_t factor)435 static void lfe_iir_c(float *output, const float *input,
436 const float iir[5][4], float hist[5][2],
437 ptrdiff_t factor)
438 {
439 float res, tmp;
440 int i, j, k;
441
442 for (i = 0; i < 64; i++) {
443 res = *input++;
444
445 for (j = 0; j < factor; j++) {
446 for (k = 0; k < 5; k++) {
447 tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
448 res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
449
450 hist[k][0] = hist[k][1];
451 hist[k][1] = tmp;
452 }
453
454 *output++ = res;
455 res = 0;
456 }
457 }
458 }
459
ff_dcadsp_init(DCADSPContext * s)460 av_cold void ff_dcadsp_init(DCADSPContext *s)
461 {
462 s->decode_hf = decode_hf_c;
463 s->decode_joint = decode_joint_c;
464
465 s->lfe_fir_float[0] = lfe_fir0_float_c;
466 s->lfe_fir_float[1] = lfe_fir1_float_c;
467 s->lfe_x96_float = lfe_x96_float_c;
468 s->sub_qmf_float[0] = sub_qmf32_float_c;
469 s->sub_qmf_float[1] = sub_qmf64_float_c;
470
471 s->lfe_fir_fixed = lfe_fir_fixed_c;
472 s->lfe_x96_fixed = lfe_x96_fixed_c;
473 s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
474 s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
475
476 s->decor = decor_c;
477
478 s->dmix_sub_xch = dmix_sub_xch_c;
479 s->dmix_sub = dmix_sub_c;
480 s->dmix_add = dmix_add_c;
481 s->dmix_scale = dmix_scale_c;
482 s->dmix_scale_inv = dmix_scale_inv_c;
483
484 s->assemble_freq_bands = assemble_freq_bands_c;
485
486 s->lbr_bank = lbr_bank_c;
487 s->lfe_iir = lfe_iir_c;
488
489 if (ARCH_X86)
490 ff_dcadsp_init_x86(s);
491 }
492