• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  *  Bluetooth low-complexity, subband codec (SBC) library
4  *
5  *  Copyright (C) 2008-2010  Nokia Corporation
6  *  Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
7  *  Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
8  *  Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
9  *
10  *
11  *  This library is free software; you can redistribute it and/or
12  *  modify it under the terms of the GNU Lesser General Public
13  *  License as published by the Free Software Foundation; either
14  *  version 2.1 of the License, or (at your option) any later version.
15  *
16  *  This library is distributed in the hope that it will be useful,
17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  Lesser General Public License for more details.
20  *
21  *  You should have received a copy of the GNU Lesser General Public
22  *  License along with this library; if not, write to the Free Software
23  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
24  *
25  */
26 
27 #include <stdint.h>
28 #include <limits.h>
29 #include "sbc.h"
30 #include "sbc_math.h"
31 #include "sbc_tables.h"
32 
33 #include "sbc_primitives_mmx.h"
34 
35 /*
36  * MMX optimizations
37  */
38 
39 #ifdef SBC_BUILD_WITH_MMX_SUPPORT
40 
sbc_analyze_four_mmx(const int16_t * in,int32_t * out,const FIXED_T * consts)41 static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out,
42 					const FIXED_T *consts)
43 {
44 	static const SBC_ALIGNED int32_t round_c[2] = {
45 		1 << (SBC_PROTO_FIXED4_SCALE - 1),
46 		1 << (SBC_PROTO_FIXED4_SCALE - 1),
47 	};
48 	asm volatile (
49 		"movq        (%0), %%mm0\n"
50 		"movq       8(%0), %%mm1\n"
51 		"pmaddwd     (%1), %%mm0\n"
52 		"pmaddwd    8(%1), %%mm1\n"
53 		"paddd       (%2), %%mm0\n"
54 		"paddd       (%2), %%mm1\n"
55 		"\n"
56 		"movq      16(%0), %%mm2\n"
57 		"movq      24(%0), %%mm3\n"
58 		"pmaddwd   16(%1), %%mm2\n"
59 		"pmaddwd   24(%1), %%mm3\n"
60 		"paddd      %%mm2, %%mm0\n"
61 		"paddd      %%mm3, %%mm1\n"
62 		"\n"
63 		"movq      32(%0), %%mm2\n"
64 		"movq      40(%0), %%mm3\n"
65 		"pmaddwd   32(%1), %%mm2\n"
66 		"pmaddwd   40(%1), %%mm3\n"
67 		"paddd      %%mm2, %%mm0\n"
68 		"paddd      %%mm3, %%mm1\n"
69 		"\n"
70 		"movq      48(%0), %%mm2\n"
71 		"movq      56(%0), %%mm3\n"
72 		"pmaddwd   48(%1), %%mm2\n"
73 		"pmaddwd   56(%1), %%mm3\n"
74 		"paddd      %%mm2, %%mm0\n"
75 		"paddd      %%mm3, %%mm1\n"
76 		"\n"
77 		"movq      64(%0), %%mm2\n"
78 		"movq      72(%0), %%mm3\n"
79 		"pmaddwd   64(%1), %%mm2\n"
80 		"pmaddwd   72(%1), %%mm3\n"
81 		"paddd      %%mm2, %%mm0\n"
82 		"paddd      %%mm3, %%mm1\n"
83 		"\n"
84 		"psrad         %4, %%mm0\n"
85 		"psrad         %4, %%mm1\n"
86 		"packssdw   %%mm0, %%mm0\n"
87 		"packssdw   %%mm1, %%mm1\n"
88 		"\n"
89 		"movq       %%mm0, %%mm2\n"
90 		"pmaddwd   80(%1), %%mm0\n"
91 		"pmaddwd   88(%1), %%mm2\n"
92 		"\n"
93 		"movq       %%mm1, %%mm3\n"
94 		"pmaddwd   96(%1), %%mm1\n"
95 		"pmaddwd  104(%1), %%mm3\n"
96 		"paddd      %%mm1, %%mm0\n"
97 		"paddd      %%mm3, %%mm2\n"
98 		"\n"
99 		"movq       %%mm0, (%3)\n"
100 		"movq       %%mm2, 8(%3)\n"
101 		:
102 		: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
103 			"i" (SBC_PROTO_FIXED4_SCALE)
104 		: "cc", "memory");
105 }
106 
sbc_analyze_eight_mmx(const int16_t * in,int32_t * out,const FIXED_T * consts)107 static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out,
108 							const FIXED_T *consts)
109 {
110 	static const SBC_ALIGNED int32_t round_c[2] = {
111 		1 << (SBC_PROTO_FIXED8_SCALE - 1),
112 		1 << (SBC_PROTO_FIXED8_SCALE - 1),
113 	};
114 	asm volatile (
115 		"movq        (%0), %%mm0\n"
116 		"movq       8(%0), %%mm1\n"
117 		"movq      16(%0), %%mm2\n"
118 		"movq      24(%0), %%mm3\n"
119 		"pmaddwd     (%1), %%mm0\n"
120 		"pmaddwd    8(%1), %%mm1\n"
121 		"pmaddwd   16(%1), %%mm2\n"
122 		"pmaddwd   24(%1), %%mm3\n"
123 		"paddd       (%2), %%mm0\n"
124 		"paddd       (%2), %%mm1\n"
125 		"paddd       (%2), %%mm2\n"
126 		"paddd       (%2), %%mm3\n"
127 		"\n"
128 		"movq      32(%0), %%mm4\n"
129 		"movq      40(%0), %%mm5\n"
130 		"movq      48(%0), %%mm6\n"
131 		"movq      56(%0), %%mm7\n"
132 		"pmaddwd   32(%1), %%mm4\n"
133 		"pmaddwd   40(%1), %%mm5\n"
134 		"pmaddwd   48(%1), %%mm6\n"
135 		"pmaddwd   56(%1), %%mm7\n"
136 		"paddd      %%mm4, %%mm0\n"
137 		"paddd      %%mm5, %%mm1\n"
138 		"paddd      %%mm6, %%mm2\n"
139 		"paddd      %%mm7, %%mm3\n"
140 		"\n"
141 		"movq      64(%0), %%mm4\n"
142 		"movq      72(%0), %%mm5\n"
143 		"movq      80(%0), %%mm6\n"
144 		"movq      88(%0), %%mm7\n"
145 		"pmaddwd   64(%1), %%mm4\n"
146 		"pmaddwd   72(%1), %%mm5\n"
147 		"pmaddwd   80(%1), %%mm6\n"
148 		"pmaddwd   88(%1), %%mm7\n"
149 		"paddd      %%mm4, %%mm0\n"
150 		"paddd      %%mm5, %%mm1\n"
151 		"paddd      %%mm6, %%mm2\n"
152 		"paddd      %%mm7, %%mm3\n"
153 		"\n"
154 		"movq      96(%0), %%mm4\n"
155 		"movq     104(%0), %%mm5\n"
156 		"movq     112(%0), %%mm6\n"
157 		"movq     120(%0), %%mm7\n"
158 		"pmaddwd   96(%1), %%mm4\n"
159 		"pmaddwd  104(%1), %%mm5\n"
160 		"pmaddwd  112(%1), %%mm6\n"
161 		"pmaddwd  120(%1), %%mm7\n"
162 		"paddd      %%mm4, %%mm0\n"
163 		"paddd      %%mm5, %%mm1\n"
164 		"paddd      %%mm6, %%mm2\n"
165 		"paddd      %%mm7, %%mm3\n"
166 		"\n"
167 		"movq     128(%0), %%mm4\n"
168 		"movq     136(%0), %%mm5\n"
169 		"movq     144(%0), %%mm6\n"
170 		"movq     152(%0), %%mm7\n"
171 		"pmaddwd  128(%1), %%mm4\n"
172 		"pmaddwd  136(%1), %%mm5\n"
173 		"pmaddwd  144(%1), %%mm6\n"
174 		"pmaddwd  152(%1), %%mm7\n"
175 		"paddd      %%mm4, %%mm0\n"
176 		"paddd      %%mm5, %%mm1\n"
177 		"paddd      %%mm6, %%mm2\n"
178 		"paddd      %%mm7, %%mm3\n"
179 		"\n"
180 		"psrad         %4, %%mm0\n"
181 		"psrad         %4, %%mm1\n"
182 		"psrad         %4, %%mm2\n"
183 		"psrad         %4, %%mm3\n"
184 		"\n"
185 		"packssdw   %%mm0, %%mm0\n"
186 		"packssdw   %%mm1, %%mm1\n"
187 		"packssdw   %%mm2, %%mm2\n"
188 		"packssdw   %%mm3, %%mm3\n"
189 		"\n"
190 		"movq       %%mm0, %%mm4\n"
191 		"movq       %%mm0, %%mm5\n"
192 		"pmaddwd  160(%1), %%mm4\n"
193 		"pmaddwd  168(%1), %%mm5\n"
194 		"\n"
195 		"movq       %%mm1, %%mm6\n"
196 		"movq       %%mm1, %%mm7\n"
197 		"pmaddwd  192(%1), %%mm6\n"
198 		"pmaddwd  200(%1), %%mm7\n"
199 		"paddd      %%mm6, %%mm4\n"
200 		"paddd      %%mm7, %%mm5\n"
201 		"\n"
202 		"movq       %%mm2, %%mm6\n"
203 		"movq       %%mm2, %%mm7\n"
204 		"pmaddwd  224(%1), %%mm6\n"
205 		"pmaddwd  232(%1), %%mm7\n"
206 		"paddd      %%mm6, %%mm4\n"
207 		"paddd      %%mm7, %%mm5\n"
208 		"\n"
209 		"movq       %%mm3, %%mm6\n"
210 		"movq       %%mm3, %%mm7\n"
211 		"pmaddwd  256(%1), %%mm6\n"
212 		"pmaddwd  264(%1), %%mm7\n"
213 		"paddd      %%mm6, %%mm4\n"
214 		"paddd      %%mm7, %%mm5\n"
215 		"\n"
216 		"movq       %%mm4, (%3)\n"
217 		"movq       %%mm5, 8(%3)\n"
218 		"\n"
219 		"movq       %%mm0, %%mm5\n"
220 		"pmaddwd  176(%1), %%mm0\n"
221 		"pmaddwd  184(%1), %%mm5\n"
222 		"\n"
223 		"movq       %%mm1, %%mm7\n"
224 		"pmaddwd  208(%1), %%mm1\n"
225 		"pmaddwd  216(%1), %%mm7\n"
226 		"paddd      %%mm1, %%mm0\n"
227 		"paddd      %%mm7, %%mm5\n"
228 		"\n"
229 		"movq       %%mm2, %%mm7\n"
230 		"pmaddwd  240(%1), %%mm2\n"
231 		"pmaddwd  248(%1), %%mm7\n"
232 		"paddd      %%mm2, %%mm0\n"
233 		"paddd      %%mm7, %%mm5\n"
234 		"\n"
235 		"movq       %%mm3, %%mm7\n"
236 		"pmaddwd  272(%1), %%mm3\n"
237 		"pmaddwd  280(%1), %%mm7\n"
238 		"paddd      %%mm3, %%mm0\n"
239 		"paddd      %%mm7, %%mm5\n"
240 		"\n"
241 		"movq       %%mm0, 16(%3)\n"
242 		"movq       %%mm5, 24(%3)\n"
243 		:
244 		: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
245 			"i" (SBC_PROTO_FIXED8_SCALE)
246 		: "cc", "memory");
247 }
248 
sbc_analyze_4b_4s_mmx(int16_t * x,int32_t * out,int out_stride)249 static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out,
250 						int out_stride)
251 {
252 	/* Analyze blocks */
253 	sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd);
254 	out += out_stride;
255 	sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even);
256 	out += out_stride;
257 	sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd);
258 	out += out_stride;
259 	sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even);
260 
261 	asm volatile ("emms\n");
262 }
263 
sbc_analyze_4b_8s_mmx(int16_t * x,int32_t * out,int out_stride)264 static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out,
265 						int out_stride)
266 {
267 	/* Analyze blocks */
268 	sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd);
269 	out += out_stride;
270 	sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even);
271 	out += out_stride;
272 	sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd);
273 	out += out_stride;
274 	sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even);
275 
276 	asm volatile ("emms\n");
277 }
278 
sbc_calc_scalefactors_mmx(int32_t sb_sample_f[16][2][8],uint32_t scale_factor[2][8],int blocks,int channels,int subbands)279 static void sbc_calc_scalefactors_mmx(
280 	int32_t sb_sample_f[16][2][8],
281 	uint32_t scale_factor[2][8],
282 	int blocks, int channels, int subbands)
283 {
284 	static const SBC_ALIGNED int32_t consts[2] = {
285 		1 << SCALE_OUT_BITS,
286 		1 << SCALE_OUT_BITS,
287 	};
288 	int ch, sb;
289 	intptr_t blk;
290 	for (ch = 0; ch < channels; ch++) {
291 		for (sb = 0; sb < subbands; sb += 2) {
292 			blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] -
293 				(char *) &sb_sample_f[0][0][0]));
294 			asm volatile (
295 				"movq         (%4), %%mm0\n"
296 			"1:\n"
297 				"movq     (%1, %0), %%mm1\n"
298 				"pxor        %%mm2, %%mm2\n"
299 				"pcmpgtd     %%mm2, %%mm1\n"
300 				"paddd    (%1, %0), %%mm1\n"
301 				"pcmpgtd     %%mm1, %%mm2\n"
302 				"pxor        %%mm2, %%mm1\n"
303 
304 				"por         %%mm1, %%mm0\n"
305 
306 				"sub            %2, %0\n"
307 				"jns            1b\n"
308 
309 				"movd        %%mm0, %k0\n"
310 				"psrlq         $32, %%mm0\n"
311 				"bsrl          %k0, %k0\n"
312 				"subl           %5, %k0\n"
313 				"movl          %k0, (%3)\n"
314 
315 				"movd        %%mm0, %k0\n"
316 				"bsrl          %k0, %k0\n"
317 				"subl           %5, %k0\n"
318 				"movl          %k0, 4(%3)\n"
319 			: "+r" (blk)
320 			: "r" (&sb_sample_f[0][ch][sb]),
321 				"i" ((char *) &sb_sample_f[1][0][0] -
322 					(char *) &sb_sample_f[0][0][0]),
323 				"r" (&scale_factor[ch][sb]),
324 				"r" (&consts),
325 				"i" (SCALE_OUT_BITS)
326 			: "cc", "memory");
327 		}
328 	}
329 	asm volatile ("emms\n");
330 }
331 
check_mmx_support(void)332 static int check_mmx_support(void)
333 {
334 #ifdef __amd64__
335 	return 1; /* We assume that all 64-bit processors have MMX support */
336 #else
337 	int cpuid_feature_information;
338 	asm volatile (
339 		/* According to Intel manual, CPUID instruction is supported
340 		 * if the value of ID bit (bit 21) in EFLAGS can be modified */
341 		"pushf\n"
342 		"movl     (%%esp),   %0\n"
343 		"xorl     $0x200000, (%%esp)\n" /* try to modify ID bit */
344 		"popf\n"
345 		"pushf\n"
346 		"xorl     (%%esp),   %0\n"      /* check if ID bit changed */
347 		"jz       1f\n"
348 		"push     %%eax\n"
349 		"push     %%ebx\n"
350 		"push     %%ecx\n"
351 		"mov      $1,        %%eax\n"
352 		"cpuid\n"
353 		"pop      %%ecx\n"
354 		"pop      %%ebx\n"
355 		"pop      %%eax\n"
356 		"1:\n"
357 		"popf\n"
358 		: "=d" (cpuid_feature_information)
359 		:
360 		: "cc");
361     return cpuid_feature_information & (1 << 23);
362 #endif
363 }
364 
sbc_init_primitives_mmx(struct sbc_encoder_state * state)365 void sbc_init_primitives_mmx(struct sbc_encoder_state *state)
366 {
367 	if (check_mmx_support()) {
368 		state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx;
369 		state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx;
370 		state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx;
371 		state->implementation_info = "MMX";
372 	}
373 }
374 
375 #endif
376