1 /*
2 * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include "config.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/x86/cpu.h"
24 #include "libavresample/audio_mix.h"
25
26 void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
27 int out_ch, int in_ch);
28 void ff_mix_2_to_1_fltp_flt_avx(float **src, float **matrix, int len,
29 int out_ch, int in_ch);
30
31 void ff_mix_2_to_1_s16p_flt_sse2(int16_t **src, float **matrix, int len,
32 int out_ch, int in_ch);
33 void ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len,
34 int out_ch, int in_ch);
35
36 void ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix,
37 int len, int out_ch, int in_ch);
38
39 void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
40 int out_ch, int in_ch);
41 void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
42 int out_ch, int in_ch);
43
44 void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len,
45 int out_ch, int in_ch);
46 void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
47 int out_ch, int in_ch);
48 void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
49 int out_ch, int in_ch);
50
51 #define DEFINE_MIX_3_8_TO_1_2(chan) \
52 void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src, \
53 float **matrix, int len, \
54 int out_ch, int in_ch); \
55 void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src, \
56 float **matrix, int len, \
57 int out_ch, int in_ch); \
58 \
59 void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src, \
60 float **matrix, int len, \
61 int out_ch, int in_ch); \
62 void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src, \
63 float **matrix, int len, \
64 int out_ch, int in_ch); \
65 \
66 void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src, \
67 float **matrix, int len, \
68 int out_ch, int in_ch); \
69 void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src, \
70 float **matrix, int len, \
71 int out_ch, int in_ch); \
72 \
73 void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src, \
74 float **matrix, int len, \
75 int out_ch, int in_ch); \
76 void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src, \
77 float **matrix, int len, \
78 int out_ch, int in_ch); \
79 \
80 void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src, \
81 float **matrix, int len, \
82 int out_ch, int in_ch); \
83 void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src, \
84 float **matrix, int len, \
85 int out_ch, int in_ch); \
86 \
87 void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src, \
88 float **matrix, int len, \
89 int out_ch, int in_ch); \
90 void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src, \
91 float **matrix, int len, \
92 int out_ch, int in_ch); \
93 \
94 void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src, \
95 float **matrix, int len, \
96 int out_ch, int in_ch); \
97 void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src, \
98 float **matrix, int len, \
99 int out_ch, int in_ch);
100
101 DEFINE_MIX_3_8_TO_1_2(3)
102 DEFINE_MIX_3_8_TO_1_2(4)
103 DEFINE_MIX_3_8_TO_1_2(5)
104 DEFINE_MIX_3_8_TO_1_2(6)
105 DEFINE_MIX_3_8_TO_1_2(7)
106 DEFINE_MIX_3_8_TO_1_2(8)
107
108 #define SET_MIX_3_8_TO_1_2(chan) \
109 if (EXTERNAL_SSE(cpu_flags)) { \
110 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
111 chan, 1, 16, 4, "SSE", \
112 ff_mix_ ## chan ## _to_1_fltp_flt_sse); \
113 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
114 chan, 2, 16, 4, "SSE", \
115 ff_mix_## chan ##_to_2_fltp_flt_sse); \
116 } \
117 if (EXTERNAL_SSE2(cpu_flags)) { \
118 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
119 chan, 1, 16, 8, "SSE2", \
120 ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \
121 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
122 chan, 2, 16, 8, "SSE2", \
123 ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \
124 } \
125 if (EXTERNAL_SSE4(cpu_flags)) { \
126 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
127 chan, 1, 16, 8, "SSE4", \
128 ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \
129 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
130 chan, 2, 16, 8, "SSE4", \
131 ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \
132 } \
133 if (EXTERNAL_AVX(cpu_flags)) { \
134 int ptr_align = 32; \
135 int smp_align = 8; \
136 if (ARCH_X86_32 || chan >= 6) { \
137 ptr_align = 16; \
138 smp_align = 4; \
139 } \
140 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
141 chan, 1, ptr_align, smp_align, "AVX", \
142 ff_mix_ ## chan ## _to_1_fltp_flt_avx); \
143 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
144 chan, 2, ptr_align, smp_align, "AVX", \
145 ff_mix_ ## chan ## _to_2_fltp_flt_avx); \
146 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
147 chan, 1, 16, 8, "AVX", \
148 ff_mix_ ## chan ## _to_1_s16p_flt_avx); \
149 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
150 chan, 2, 16, 8, "AVX", \
151 ff_mix_ ## chan ## _to_2_s16p_flt_avx); \
152 } \
153 if (EXTERNAL_FMA4(cpu_flags)) { \
154 int ptr_align = 32; \
155 int smp_align = 8; \
156 if (ARCH_X86_32 || chan >= 6) { \
157 ptr_align = 16; \
158 smp_align = 4; \
159 } \
160 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
161 chan, 1, ptr_align, smp_align, "FMA4", \
162 ff_mix_ ## chan ## _to_1_fltp_flt_fma4); \
163 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
164 chan, 2, ptr_align, smp_align, "FMA4", \
165 ff_mix_ ## chan ## _to_2_fltp_flt_fma4); \
166 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
167 chan, 1, 16, 8, "FMA4", \
168 ff_mix_ ## chan ## _to_1_s16p_flt_fma4); \
169 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
170 chan, 2, 16, 8, "FMA4", \
171 ff_mix_ ## chan ## _to_2_s16p_flt_fma4); \
172 }
173
ff_audio_mix_init_x86(AudioMix * am)174 av_cold void ff_audio_mix_init_x86(AudioMix *am)
175 {
176 int cpu_flags = av_get_cpu_flags();
177
178 if (EXTERNAL_SSE(cpu_flags)) {
179 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
180 2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
181 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
182 1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
183 }
184 if (EXTERNAL_SSE2(cpu_flags)) {
185 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
186 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
187 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
188 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2);
189 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
190 1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
191 }
192 if (EXTERNAL_SSE4(cpu_flags)) {
193 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
194 2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
195 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
196 1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
197 }
198 if (EXTERNAL_AVX_FAST(cpu_flags)) {
199 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
200 2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
201 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
202 1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
203 }
204 if (EXTERNAL_AVX(cpu_flags)) {
205 ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
206 1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
207 }
208
209 SET_MIX_3_8_TO_1_2(3)
210 SET_MIX_3_8_TO_1_2(4)
211 SET_MIX_3_8_TO_1_2(5)
212 SET_MIX_3_8_TO_1_2(6)
213 SET_MIX_3_8_TO_1_2(7)
214 SET_MIX_3_8_TO_1_2(8)
215 }
216