1 /*
2 * Copyright (C) 2016 foo86
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "libavutil/mem.h"
22
23 #include "dcadsp.h"
24 #include "dcamath.h"
25
decode_hf_c(int32_t ** dst,const int32_t * vq_index,const int8_t hf_vq[1024][32],int32_t scale_factors[32][2],ptrdiff_t sb_start,ptrdiff_t sb_end,ptrdiff_t ofs,ptrdiff_t len)26 static void decode_hf_c(int32_t **dst,
27 const int32_t *vq_index,
28 const int8_t hf_vq[1024][32],
29 int32_t scale_factors[32][2],
30 ptrdiff_t sb_start, ptrdiff_t sb_end,
31 ptrdiff_t ofs, ptrdiff_t len)
32 {
33 int i, j;
34
35 for (i = sb_start; i < sb_end; i++) {
36 const int8_t *coeff = hf_vq[vq_index[i]];
37 int32_t scale = scale_factors[i][0];
38 for (j = 0; j < len; j++)
39 dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
40 }
41 }
42
decode_joint_c(int32_t ** dst,int32_t ** src,const int32_t * scale_factors,ptrdiff_t sb_start,ptrdiff_t sb_end,ptrdiff_t ofs,ptrdiff_t len)43 static void decode_joint_c(int32_t **dst, int32_t **src,
44 const int32_t *scale_factors,
45 ptrdiff_t sb_start, ptrdiff_t sb_end,
46 ptrdiff_t ofs, ptrdiff_t len)
47 {
48 int i, j;
49
50 for (i = sb_start; i < sb_end; i++) {
51 int32_t scale = scale_factors[i];
52 for (j = 0; j < len; j++)
53 dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
54 }
55 }
56
lfe_fir_float_c(float * pcm_samples,int32_t * lfe_samples,const float * filter_coeff,ptrdiff_t npcmblocks,int dec_select)57 static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
58 const float *filter_coeff, ptrdiff_t npcmblocks,
59 int dec_select)
60 {
61 // Select decimation factor
62 int factor = 64 << dec_select;
63 int ncoeffs = 8 >> dec_select;
64 int nlfesamples = npcmblocks >> (dec_select + 1);
65 int i, j, k;
66
67 for (i = 0; i < nlfesamples; i++) {
68 // One decimated sample generates 64 or 128 interpolated ones
69 for (j = 0; j < factor / 2; j++) {
70 float a = 0;
71 float b = 0;
72
73 for (k = 0; k < ncoeffs; k++) {
74 a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k];
75 b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
76 }
77
78 pcm_samples[ j] = a;
79 pcm_samples[factor / 2 + j] = b;
80 }
81
82 lfe_samples++;
83 pcm_samples += factor;
84 }
85 }
86
lfe_fir0_float_c(float * pcm_samples,int32_t * lfe_samples,const float * filter_coeff,ptrdiff_t npcmblocks)87 static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
88 const float *filter_coeff, ptrdiff_t npcmblocks)
89 {
90 lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
91 }
92
lfe_fir1_float_c(float * pcm_samples,int32_t * lfe_samples,const float * filter_coeff,ptrdiff_t npcmblocks)93 static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
94 const float *filter_coeff, ptrdiff_t npcmblocks)
95 {
96 lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
97 }
98
lfe_x96_float_c(float * dst,const float * src,float * hist,ptrdiff_t len)99 static void lfe_x96_float_c(float *dst, const float *src,
100 float *hist, ptrdiff_t len)
101 {
102 float prev = *hist;
103 int i;
104
105 for (i = 0; i < len; i++) {
106 float a = 0.25f * src[i] + 0.75f * prev;
107 float b = 0.75f * src[i] + 0.25f * prev;
108 prev = src[i];
109 *dst++ = a;
110 *dst++ = b;
111 }
112
113 *hist = prev;
114 }
115
sub_qmf32_float_c(SynthFilterContext * synth,FFTContext * imdct,float * pcm_samples,int32_t ** subband_samples_lo,int32_t ** subband_samples_hi,float * hist1,int * offset,float * hist2,const float * filter_coeff,ptrdiff_t npcmblocks,float scale)116 static void sub_qmf32_float_c(SynthFilterContext *synth,
117 FFTContext *imdct,
118 float *pcm_samples,
119 int32_t **subband_samples_lo,
120 int32_t **subband_samples_hi,
121 float *hist1, int *offset, float *hist2,
122 const float *filter_coeff, ptrdiff_t npcmblocks,
123 float scale)
124 {
125 LOCAL_ALIGNED_32(float, input, [32]);
126 int i, j;
127
128 for (j = 0; j < npcmblocks; j++) {
129 // Load in one sample from each subband
130 for (i = 0; i < 32; i++) {
131 if ((i - 1) & 2)
132 input[i] = -subband_samples_lo[i][j];
133 else
134 input[i] = subband_samples_lo[i][j];
135 }
136
137 // One subband sample generates 32 interpolated ones
138 synth->synth_filter_float(imdct, hist1, offset,
139 hist2, filter_coeff,
140 pcm_samples, input, scale);
141 pcm_samples += 32;
142 }
143 }
144
sub_qmf64_float_c(SynthFilterContext * synth,FFTContext * imdct,float * pcm_samples,int32_t ** subband_samples_lo,int32_t ** subband_samples_hi,float * hist1,int * offset,float * hist2,const float * filter_coeff,ptrdiff_t npcmblocks,float scale)145 static void sub_qmf64_float_c(SynthFilterContext *synth,
146 FFTContext *imdct,
147 float *pcm_samples,
148 int32_t **subband_samples_lo,
149 int32_t **subband_samples_hi,
150 float *hist1, int *offset, float *hist2,
151 const float *filter_coeff, ptrdiff_t npcmblocks,
152 float scale)
153 {
154 LOCAL_ALIGNED_32(float, input, [64]);
155 int i, j;
156
157 if (!subband_samples_hi)
158 memset(&input[32], 0, sizeof(input[0]) * 32);
159
160 for (j = 0; j < npcmblocks; j++) {
161 // Load in one sample from each subband
162 if (subband_samples_hi) {
163 // Full 64 subbands, first 32 are residual coded
164 for (i = 0; i < 32; i++) {
165 if ((i - 1) & 2)
166 input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
167 else
168 input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
169 }
170 for (i = 32; i < 64; i++) {
171 if ((i - 1) & 2)
172 input[i] = -subband_samples_hi[i][j];
173 else
174 input[i] = subband_samples_hi[i][j];
175 }
176 } else {
177 // Only first 32 subbands
178 for (i = 0; i < 32; i++) {
179 if ((i - 1) & 2)
180 input[i] = -subband_samples_lo[i][j];
181 else
182 input[i] = subband_samples_lo[i][j];
183 }
184 }
185
186 // One subband sample generates 64 interpolated ones
187 synth->synth_filter_float_64(imdct, hist1, offset,
188 hist2, filter_coeff,
189 pcm_samples, input, scale);
190 pcm_samples += 64;
191 }
192 }
193
lfe_fir_fixed_c(int32_t * pcm_samples,int32_t * lfe_samples,const int32_t * filter_coeff,ptrdiff_t npcmblocks)194 static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
195 const int32_t *filter_coeff, ptrdiff_t npcmblocks)
196 {
197 // Select decimation factor
198 int nlfesamples = npcmblocks >> 1;
199 int i, j, k;
200
201 for (i = 0; i < nlfesamples; i++) {
202 // One decimated sample generates 64 interpolated ones
203 for (j = 0; j < 32; j++) {
204 int64_t a = 0;
205 int64_t b = 0;
206
207 for (k = 0; k < 8; k++) {
208 a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k];
209 b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
210 }
211
212 pcm_samples[ j] = clip23(norm23(a));
213 pcm_samples[32 + j] = clip23(norm23(b));
214 }
215
216 lfe_samples++;
217 pcm_samples += 64;
218 }
219 }
220
lfe_x96_fixed_c(int32_t * dst,const int32_t * src,int32_t * hist,ptrdiff_t len)221 static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
222 int32_t *hist, ptrdiff_t len)
223 {
224 int32_t prev = *hist;
225 int i;
226
227 for (i = 0; i < len; i++) {
228 int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
229 int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
230 prev = src[i];
231 *dst++ = clip23(norm23(a));
232 *dst++ = clip23(norm23(b));
233 }
234
235 *hist = prev;
236 }
237
sub_qmf32_fixed_c(SynthFilterContext * synth,DCADCTContext * imdct,int32_t * pcm_samples,int32_t ** subband_samples_lo,int32_t ** subband_samples_hi,int32_t * hist1,int * offset,int32_t * hist2,const int32_t * filter_coeff,ptrdiff_t npcmblocks)238 static void sub_qmf32_fixed_c(SynthFilterContext *synth,
239 DCADCTContext *imdct,
240 int32_t *pcm_samples,
241 int32_t **subband_samples_lo,
242 int32_t **subband_samples_hi,
243 int32_t *hist1, int *offset, int32_t *hist2,
244 const int32_t *filter_coeff, ptrdiff_t npcmblocks)
245 {
246 LOCAL_ALIGNED_32(int32_t, input, [32]);
247 int i, j;
248
249 for (j = 0; j < npcmblocks; j++) {
250 // Load in one sample from each subband
251 for (i = 0; i < 32; i++)
252 input[i] = subband_samples_lo[i][j];
253
254 // One subband sample generates 32 interpolated ones
255 synth->synth_filter_fixed(imdct, hist1, offset,
256 hist2, filter_coeff,
257 pcm_samples, input);
258 pcm_samples += 32;
259 }
260 }
261
sub_qmf64_fixed_c(SynthFilterContext * synth,DCADCTContext * imdct,int32_t * pcm_samples,int32_t ** subband_samples_lo,int32_t ** subband_samples_hi,int32_t * hist1,int * offset,int32_t * hist2,const int32_t * filter_coeff,ptrdiff_t npcmblocks)262 static void sub_qmf64_fixed_c(SynthFilterContext *synth,
263 DCADCTContext *imdct,
264 int32_t *pcm_samples,
265 int32_t **subband_samples_lo,
266 int32_t **subband_samples_hi,
267 int32_t *hist1, int *offset, int32_t *hist2,
268 const int32_t *filter_coeff, ptrdiff_t npcmblocks)
269 {
270 LOCAL_ALIGNED_32(int32_t, input, [64]);
271 int i, j;
272
273 if (!subband_samples_hi)
274 memset(&input[32], 0, sizeof(input[0]) * 32);
275
276 for (j = 0; j < npcmblocks; j++) {
277 // Load in one sample from each subband
278 if (subband_samples_hi) {
279 // Full 64 subbands, first 32 are residual coded
280 for (i = 0; i < 32; i++)
281 input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
282 for (i = 32; i < 64; i++)
283 input[i] = subband_samples_hi[i][j];
284 } else {
285 // Only first 32 subbands
286 for (i = 0; i < 32; i++)
287 input[i] = subband_samples_lo[i][j];
288 }
289
290 // One subband sample generates 64 interpolated ones
291 synth->synth_filter_fixed_64(imdct, hist1, offset,
292 hist2, filter_coeff,
293 pcm_samples, input);
294 pcm_samples += 64;
295 }
296 }
297
decor_c(int32_t * dst,const int32_t * src,int coeff,ptrdiff_t len)298 static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
299 {
300 int i;
301
302 for (i = 0; i < len; i++)
303 dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
304 }
305
dmix_sub_xch_c(int32_t * dst1,int32_t * dst2,const int32_t * src,ptrdiff_t len)306 static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
307 const int32_t *src, ptrdiff_t len)
308 {
309 int i;
310
311 for (i = 0; i < len; i++) {
312 int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
313 dst1[i] -= cs;
314 dst2[i] -= cs;
315 }
316 }
317
dmix_sub_c(int32_t * dst,const int32_t * src,int coeff,ptrdiff_t len)318 static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
319 {
320 int i;
321
322 for (i = 0; i < len; i++)
323 dst[i] -= (unsigned)mul15(src[i], coeff);
324 }
325
dmix_add_c(int32_t * dst,const int32_t * src,int coeff,ptrdiff_t len)326 static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
327 {
328 int i;
329
330 for (i = 0; i < len; i++)
331 dst[i] += mul15(src[i], coeff);
332 }
333
dmix_scale_c(int32_t * dst,int scale,ptrdiff_t len)334 static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
335 {
336 int i;
337
338 for (i = 0; i < len; i++)
339 dst[i] = mul15(dst[i], scale);
340 }
341
dmix_scale_inv_c(int32_t * dst,int scale_inv,ptrdiff_t len)342 static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
343 {
344 int i;
345
346 for (i = 0; i < len; i++)
347 dst[i] = mul16(dst[i], scale_inv);
348 }
349
filter0(SUINT32 * dst,const int32_t * src,int32_t coeff,ptrdiff_t len)350 static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
351 {
352 int i;
353
354 for (i = 0; i < len; i++)
355 dst[i] -= mul22(src[i], coeff);
356 }
357
filter1(SUINT32 * dst,const int32_t * src,int32_t coeff,ptrdiff_t len)358 static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
359 {
360 int i;
361
362 for (i = 0; i < len; i++)
363 dst[i] -= mul23(src[i], coeff);
364 }
365
assemble_freq_bands_c(int32_t * dst,int32_t * src0,int32_t * src1,const int32_t * coeff,ptrdiff_t len)366 static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
367 const int32_t *coeff, ptrdiff_t len)
368 {
369 int i;
370
371 filter0(src0, src1, coeff[0], len);
372 filter0(src1, src0, coeff[1], len);
373 filter0(src0, src1, coeff[2], len);
374 filter0(src1, src0, coeff[3], len);
375
376 for (i = 0; i < 8; i++, src0--) {
377 filter1(src0, src1, coeff[i + 4], len);
378 filter1(src1, src0, coeff[i + 12], len);
379 filter1(src0, src1, coeff[i + 4], len);
380 }
381
382 for (i = 0; i < len; i++) {
383 *dst++ = *src1++;
384 *dst++ = *++src0;
385 }
386 }
387
lbr_bank_c(float output[32][4],float ** input,const float * coeff,ptrdiff_t ofs,ptrdiff_t len)388 static void lbr_bank_c(float output[32][4], float **input,
389 const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
390 {
391 float SW0 = coeff[0];
392 float SW1 = coeff[1];
393 float SW2 = coeff[2];
394 float SW3 = coeff[3];
395
396 float C1 = coeff[4];
397 float C2 = coeff[5];
398 float C3 = coeff[6];
399 float C4 = coeff[7];
400
401 float AL1 = coeff[8];
402 float AL2 = coeff[9];
403
404 int i;
405
406 // Short window and 8 point forward MDCT
407 for (i = 0; i < len; i++) {
408 float *src = input[i] + ofs;
409
410 float a = src[-4] * SW0 - src[-1] * SW3;
411 float b = src[-3] * SW1 - src[-2] * SW2;
412 float c = src[ 2] * SW1 + src[ 1] * SW2;
413 float d = src[ 3] * SW0 + src[ 0] * SW3;
414
415 output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
416 output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
417 output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
418 output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
419 }
420
421 // Aliasing cancellation for high frequencies
422 for (i = 12; i < len - 1; i++) {
423 float a = output[i ][3] * AL1;
424 float b = output[i+1][0] * AL1;
425 output[i ][3] += b - a;
426 output[i+1][0] -= b + a;
427 a = output[i ][2] * AL2;
428 b = output[i+1][1] * AL2;
429 output[i ][2] += b - a;
430 output[i+1][1] -= b + a;
431 }
432 }
433
lfe_iir_c(float * output,const float * input,const float iir[5][4],float hist[5][2],ptrdiff_t factor)434 static void lfe_iir_c(float *output, const float *input,
435 const float iir[5][4], float hist[5][2],
436 ptrdiff_t factor)
437 {
438 float res, tmp;
439 int i, j, k;
440
441 for (i = 0; i < 64; i++) {
442 res = *input++;
443
444 for (j = 0; j < factor; j++) {
445 for (k = 0; k < 5; k++) {
446 tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
447 res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
448
449 hist[k][0] = hist[k][1];
450 hist[k][1] = tmp;
451 }
452
453 *output++ = res;
454 res = 0;
455 }
456 }
457 }
458
ff_dcadsp_init(DCADSPContext * s)459 av_cold void ff_dcadsp_init(DCADSPContext *s)
460 {
461 s->decode_hf = decode_hf_c;
462 s->decode_joint = decode_joint_c;
463
464 s->lfe_fir_float[0] = lfe_fir0_float_c;
465 s->lfe_fir_float[1] = lfe_fir1_float_c;
466 s->lfe_x96_float = lfe_x96_float_c;
467 s->sub_qmf_float[0] = sub_qmf32_float_c;
468 s->sub_qmf_float[1] = sub_qmf64_float_c;
469
470 s->lfe_fir_fixed = lfe_fir_fixed_c;
471 s->lfe_x96_fixed = lfe_x96_fixed_c;
472 s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
473 s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
474
475 s->decor = decor_c;
476
477 s->dmix_sub_xch = dmix_sub_xch_c;
478 s->dmix_sub = dmix_sub_c;
479 s->dmix_add = dmix_add_c;
480 s->dmix_scale = dmix_scale_c;
481 s->dmix_scale_inv = dmix_scale_inv_c;
482
483 s->assemble_freq_bands = assemble_freq_bands_c;
484
485 s->lbr_bank = lbr_bank_c;
486 s->lfe_iir = lfe_iir_c;
487
488 if (ARCH_X86)
489 ff_dcadsp_init_x86(s);
490 }
491