• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/x86/cpu.h"
24 #include "libavresample/audio_mix.h"
25 
26 void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
27                                 int out_ch, int in_ch);
28 void ff_mix_2_to_1_fltp_flt_avx(float **src, float **matrix, int len,
29                                 int out_ch, int in_ch);
30 
31 void ff_mix_2_to_1_s16p_flt_sse2(int16_t **src, float **matrix, int len,
32                                  int out_ch, int in_ch);
33 void ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len,
34                                  int out_ch, int in_ch);
35 
36 void ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix,
37                                 int len, int out_ch, int in_ch);
38 
39 void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
40                                 int out_ch, int in_ch);
41 void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
42                                 int out_ch, int in_ch);
43 
44 void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len,
45                                  int out_ch, int in_ch);
46 void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
47                                  int out_ch, int in_ch);
48 void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
49                                  int out_ch, int in_ch);
50 
51 #define DEFINE_MIX_3_8_TO_1_2(chan)                                     \
52 void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src,                 \
53                                            float **matrix, int len,     \
54                                            int out_ch, int in_ch);      \
55 void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src,                 \
56                                            float **matrix, int len,     \
57                                            int out_ch, int in_ch);      \
58                                                                         \
59 void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src,              \
60                                             float **matrix, int len,    \
61                                             int out_ch, int in_ch);     \
62 void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src,              \
63                                             float **matrix, int len,    \
64                                             int out_ch, int in_ch);     \
65                                                                         \
66 void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src,              \
67                                             float **matrix, int len,    \
68                                             int out_ch, int in_ch);     \
69 void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src,              \
70                                             float **matrix, int len,    \
71                                             int out_ch, int in_ch);     \
72                                                                         \
73 void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src,                 \
74                                            float **matrix, int len,     \
75                                            int out_ch, int in_ch);      \
76 void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src,                 \
77                                            float **matrix, int len,     \
78                                            int out_ch, int in_ch);      \
79                                                                         \
80 void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src,               \
81                                            float **matrix, int len,     \
82                                            int out_ch, int in_ch);      \
83 void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src,               \
84                                            float **matrix, int len,     \
85                                            int out_ch, int in_ch);      \
86                                                                         \
87 void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src,                \
88                                             float **matrix, int len,    \
89                                             int out_ch, int in_ch);     \
90 void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src,                \
91                                             float **matrix, int len,    \
92                                             int out_ch, int in_ch);     \
93                                                                         \
94 void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src,              \
95                                             float **matrix, int len,    \
96                                             int out_ch, int in_ch);     \
97 void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src,              \
98                                             float **matrix, int len,    \
99                                             int out_ch, int in_ch);
100 
101 DEFINE_MIX_3_8_TO_1_2(3)
102 DEFINE_MIX_3_8_TO_1_2(4)
103 DEFINE_MIX_3_8_TO_1_2(5)
104 DEFINE_MIX_3_8_TO_1_2(6)
105 DEFINE_MIX_3_8_TO_1_2(7)
106 DEFINE_MIX_3_8_TO_1_2(8)
107 
108 #define SET_MIX_3_8_TO_1_2(chan)                                            \
109     if (EXTERNAL_SSE(cpu_flags)) {                                          \
110         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
111                               chan, 1, 16, 4, "SSE",                        \
112                               ff_mix_ ## chan ## _to_1_fltp_flt_sse);       \
113         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
114                               chan, 2, 16, 4, "SSE",                        \
115                               ff_mix_## chan ##_to_2_fltp_flt_sse);         \
116     }                                                                       \
117     if (EXTERNAL_SSE2(cpu_flags)) {                                         \
118         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
119                               chan, 1, 16, 8, "SSE2",                       \
120                               ff_mix_ ## chan ## _to_1_s16p_flt_sse2);      \
121         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
122                               chan, 2, 16, 8, "SSE2",                       \
123                               ff_mix_ ## chan ## _to_2_s16p_flt_sse2);      \
124     }                                                                       \
125     if (EXTERNAL_SSE4(cpu_flags)) {                                         \
126         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
127                               chan, 1, 16, 8, "SSE4",                       \
128                               ff_mix_ ## chan ## _to_1_s16p_flt_sse4);      \
129         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
130                               chan, 2, 16, 8, "SSE4",                       \
131                               ff_mix_ ## chan ## _to_2_s16p_flt_sse4);      \
132     }                                                                       \
133     if (EXTERNAL_AVX(cpu_flags)) {                                          \
134         int ptr_align = 32;                                                 \
135         int smp_align = 8;                                                  \
136         if (ARCH_X86_32 || chan >= 6) {                                     \
137             ptr_align = 16;                                                 \
138             smp_align = 4;                                                  \
139         }                                                                   \
140         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
141                               chan, 1, ptr_align, smp_align, "AVX",         \
142                               ff_mix_ ## chan ## _to_1_fltp_flt_avx);       \
143         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
144                               chan, 2, ptr_align, smp_align, "AVX",         \
145                               ff_mix_ ## chan ## _to_2_fltp_flt_avx);       \
146         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
147                               chan, 1, 16, 8, "AVX",                        \
148                               ff_mix_ ## chan ## _to_1_s16p_flt_avx);       \
149         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
150                               chan, 2, 16, 8, "AVX",                        \
151                               ff_mix_ ## chan ## _to_2_s16p_flt_avx);       \
152     }                                                                       \
153     if (EXTERNAL_FMA4(cpu_flags)) {                                         \
154         int ptr_align = 32;                                                 \
155         int smp_align = 8;                                                  \
156         if (ARCH_X86_32 || chan >= 6) {                                     \
157             ptr_align = 16;                                                 \
158             smp_align = 4;                                                  \
159         }                                                                   \
160         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
161                               chan, 1, ptr_align, smp_align, "FMA4",        \
162                               ff_mix_ ## chan ## _to_1_fltp_flt_fma4);      \
163         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
164                               chan, 2, ptr_align, smp_align, "FMA4",        \
165                               ff_mix_ ## chan ## _to_2_fltp_flt_fma4);      \
166         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
167                               chan, 1, 16, 8, "FMA4",                       \
168                               ff_mix_ ## chan ## _to_1_s16p_flt_fma4);      \
169         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
170                               chan, 2, 16, 8, "FMA4",                       \
171                               ff_mix_ ## chan ## _to_2_s16p_flt_fma4);      \
172     }
173 
ff_audio_mix_init_x86(AudioMix * am)174 av_cold void ff_audio_mix_init_x86(AudioMix *am)
175 {
176     int cpu_flags = av_get_cpu_flags();
177 
178     if (EXTERNAL_SSE(cpu_flags)) {
179         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
180                               2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
181         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
182                               1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
183     }
184     if (EXTERNAL_SSE2(cpu_flags)) {
185         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
186                               2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
187         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
188                               2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2);
189         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
190                               1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
191     }
192     if (EXTERNAL_SSE4(cpu_flags)) {
193         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
194                               2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
195         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
196                               1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
197     }
198     if (EXTERNAL_AVX_FAST(cpu_flags)) {
199         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
200                               2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
201         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
202                               1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
203     }
204     if (EXTERNAL_AVX(cpu_flags)) {
205         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
206                               1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
207     }
208 
209     SET_MIX_3_8_TO_1_2(3)
210     SET_MIX_3_8_TO_1_2(4)
211     SET_MIX_3_8_TO_1_2(5)
212     SET_MIX_3_8_TO_1_2(6)
213     SET_MIX_3_8_TO_1_2(7)
214     SET_MIX_3_8_TO_1_2(8)
215 }
216