• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * \file pcm/pcm_dmix_x86_64.h
3  * \ingroup PCM_Plugins
4  * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - X86-64 assembler code
5  * \author Takashi Iwai <tiwai@suse.de>
6  * \date 2003
7  */
8 /*
9  *  PCM - Direct Stream Mixing
10  *  Copyright (c) 2003 by Jaroslav Kysela <perex@perex.cz>
11  *                        Takashi Iwai <tiwai@suse.de>
12  *
13  *
14  *   This library is free software; you can redistribute it and/or modify
15  *   it under the terms of the GNU Lesser General Public License as
16  *   published by the Free Software Foundation; either version 2.1 of
17  *   the License, or (at your option) any later version.
18  *
19  *   This program is distributed in the hope that it will be useful,
20  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
21  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22  *   GNU Lesser General Public License for more details.
23  *
24  *   You should have received a copy of the GNU Lesser General Public
25  *   License along with this library; if not, write to the Free Software
26  *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
27  *
28  */
29 
30 #if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
31 #  define BOUNDED_RBX
32 #endif
33 
34 /*
35  *  MMX optimized
36  */
MIX_AREAS_16(unsigned int size,volatile signed short * dst,signed short * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)37 static void MIX_AREAS_16(unsigned int size,
38 			 volatile signed short *dst, signed short *src,
39 			 volatile signed int *sum, size_t dst_step,
40 			 size_t src_step, size_t sum_step)
41 {
42 #ifdef BOUNDED_RBX
43 	unsigned long long old_rbx;
44 #endif
45 	/*
46 	 *  RSI - src
47 	 *  RDI - dst
48 	 *  RBX - sum
49 	 *  ECX - old sample
50 	 *  EAX - sample / temporary
51 	 *  EDX - temporary
52 	 */
53 	__asm__ __volatile__ (
54 		"\n"
55 #ifdef BOUNDED_RBX
56 		"\tmovq %%rbx, %[old_rbx]\n"
57 #endif
58 		/*
59 		 *  initialization, load RSI, RDI, RBX registers
60 		 */
61 #ifndef _ILP32
62 		"\tmovq %[dst], %%rdi\n"
63 		"\tmovq %[src], %%rsi\n"
64 		"\tmovq %[sum], %%rbx\n"
65 #else
66 		"\tmovl %[dst], %%edi\n"
67 		"\tmovl %[src], %%esi\n"
68 		"\tmovl %[sum], %%ebx\n"
69 #endif
70 
71 		/*
72 		 * while (size-- > 0) {
73 		 */
74 		"\tcmpl $0, %[size]\n"
75 		"jz 6f\n"
76 
77 		"\t.p2align 4,,15\n"
78 
79 		"1:"
80 
81 		/*
82 		 *   sample = *src;
83 		 *   sum_sample = *sum;
84 		 *   if (cmpxchg(*dst, 0, 1) == 0)
85 		 *     sample -= sum_sample;
86 		 *   xadd(*sum, sample);
87 		 */
88 		"\tmovw $0, %%ax\n"
89 		"\tmovw $1, %%cx\n"
90 		"\tmovl (%%rbx), %%edx\n"
91 		"\t" LOCK_PREFIX "cmpxchgw %%cx, (%%rdi)\n"
92 		"\tmovswl (%%rsi), %%ecx\n"
93 		"\tjnz 2f\n"
94 		"\t" XSUB " %%edx, %%ecx\n"
95 		"2:"
96 		"\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n"
97 
98 		/*
99 		 *   do {
100 		 *     sample = old_sample = *sum;
101 		 *     saturate(v);
102 		 *     *dst = sample;
103 		 *   } while (v != *sum);
104 		 */
105 
106 		"3:"
107 		"\tmovl (%%rbx), %%ecx\n"
108 		"\tmovd %%ecx, %%mm0\n"
109 		"\tpackssdw %%mm1, %%mm0\n"
110 		"\tmovd %%mm0, %%eax\n"
111 		"\tmovw %%ax, (%%rdi)\n"
112 		"\tcmpl %%ecx, (%%rbx)\n"
113 		"\tjnz 3b\n"
114 
115 		/*
116 		 * while (size-- > 0)
117 		 */
118 #ifndef _ILP32
119 		"\taddq %[dst_step], %%rdi\n"
120 		"\taddq %[src_step], %%rsi\n"
121 		"\taddq %[sum_step], %%rbx\n"
122 #else
123 		"\taddl %[dst_step], %%edi\n"
124 		"\taddl %[src_step], %%esi\n"
125 		"\taddl %[sum_step], %%ebx\n"
126 #endif
127 		"\tdecl %[size]\n"
128 		"\tjnz 1b\n"
129 
130 		"6:"
131 
132 		"\temms\n"
133 #ifdef BOUNDED_RBX
134 		"\tmovq %[old_rbx], %%rbx\n"
135 #endif
136 		: [size] "+&rm" (size)
137 #ifdef BOUNDED_RBX
138 		  , [old_rbx] "=m" (old_rbx)
139 #endif
140 	        : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
141 		  [dst_step] "im" (dst_step),  [src_step] "im" (src_step),
142 		  [sum_step] "im" (sum_step)
143 		: "rsi", "rdi", "edx", "ecx", "eax", "memory", "cc"
144 #ifndef BOUNDED_RBX
145 		  , "rbx"
146 #endif
147 #ifdef HAVE_MMX
148 		  , "mm0"
149 #else
150 		  , "st", "st(1)", "st(2)", "st(3)",
151 		  "st(4)", "st(5)", "st(6)", "st(7)"
152 #endif
153 	);
154 }
155 
156 /*
157  *  32-bit version (24-bit resolution)
158  */
MIX_AREAS_32(unsigned int size,volatile signed int * dst,signed int * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)159 static void MIX_AREAS_32(unsigned int size,
160 			 volatile signed int *dst, signed int *src,
161 			 volatile signed int *sum, size_t dst_step,
162 			 size_t src_step, size_t sum_step)
163 {
164 #ifdef BOUNDED_RBX
165 	unsigned long long old_rbx;
166 #endif
167 	/*
168 	 *  RSI - src
169 	 *  RDI - dst
170 	 *  RBX - sum
171 	 *  ECX - old sample
172 	 *  EAX - sample / temporary
173 	 *  EDX - temporary
174 	 */
175 	__asm__ __volatile__ (
176 		"\n"
177 #ifdef BOUNDED_RBX
178 		"\tmovq %%rbx, %[old_rbx]\n"
179 #endif
180 		/*
181 		 *  initialization, load RSI, RDI, RBX registers
182 		 */
183 #ifndef _ILP32
184 		"\tmovq %[dst], %%rdi\n"
185 		"\tmovq %[src], %%rsi\n"
186 		"\tmovq %[sum], %%rbx\n"
187 #else
188 		"\tmovl %[dst], %%edi\n"
189 		"\tmovl %[src], %%esi\n"
190 		"\tmovl %[sum], %%ebx\n"
191 #endif
192 
193 		/*
194 		 * while (size-- > 0) {
195 		 */
196 		"\tcmpl $0, %[size]\n"
197 		"jz 6f\n"
198 
199 		"\t.p2align 4,,15\n"
200 
201 		"1:"
202 
203 		/*
204 		 *   sample = *src;
205 		 *   sum_sample = *sum;
206 		 *   if (cmpxchg(*dst, 0, 1) == 0)
207 		 *     sample -= sum_sample;
208 		 *   xadd(*sum, sample);
209 		 */
210 		"\tmovl $0, %%eax\n"
211 		"\tmovl $1, %%ecx\n"
212 		"\tmovl (%%rbx), %%edx\n"
213 		"\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%rdi)\n"
214 		"\tjnz 2f\n"
215 		"\tmovl (%%rsi), %%ecx\n"
216 		/* sample >>= 8 */
217 		"\tsarl $8, %%ecx\n"
218 		"\t" XSUB " %%edx, %%ecx\n"
219 		"\tjmp 21f\n"
220 		"2:"
221 		"\tmovl (%%rsi), %%ecx\n"
222 		/* sample >>= 8 */
223 		"\tsarl $8, %%ecx\n"
224 		"21:"
225 		"\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n"
226 
227 		/*
228 		 *   do {
229 		 *     sample = old_sample = *sum;
230 		 *     saturate(v);
231 		 *     *dst = sample;
232 		 *   } while (v != *sum);
233 		 */
234 
235 		"3:"
236 		"\tmovl (%%rbx), %%ecx\n"
237 		/*
238 		 *  if (sample > 0x7fff00)
239 		 */
240 		"\tmovl $0x7fffff, %%eax\n"
241 		"\tcmpl %%eax, %%ecx\n"
242 		"\tjg 4f\n"
243 		/*
244 		 *  if (sample < -0x800000)
245 		 */
246 		"\tmovl $-0x800000, %%eax\n"
247 		"\tcmpl %%eax, %%ecx\n"
248 		"\tjl 4f\n"
249 		"\tmovl %%ecx, %%eax\n"
250 		"4:"
251 		/*
252 		 *  sample <<= 8;
253 		 */
254 		"\tsall $8, %%eax\n"
255 		"\tmovl %%eax, (%%rdi)\n"
256 		"\tcmpl %%ecx, (%%rbx)\n"
257 		"\tjnz 3b\n"
258 
259 		/*
260 		 * while (size-- > 0)
261 		 */
262 #ifndef _ILP32
263 		"\taddq %[dst_step], %%rdi\n"
264 		"\taddq %[src_step], %%rsi\n"
265 		"\taddq %[sum_step], %%rbx\n"
266 #else
267 		"\taddl %[dst_step], %%edi\n"
268 		"\taddl %[src_step], %%esi\n"
269 		"\taddl %[sum_step], %%ebx\n"
270 #endif
271 		"\tdecl %[size]\n"
272 		"\tjnz 1b\n"
273 
274 		"6:"
275 #ifdef BOUNDED_RBX
276 		"\tmovq %[old_rbx], %%rbx\n"
277 #endif
278 		: [size] "+&rm" (size)
279 #ifdef BOUNDED_RBX
280 		  , [old_rbx] "=m" (old_rbx)
281 #endif
282 	        : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
283 		  [dst_step] "im" (dst_step),  [src_step] "im" (src_step),
284 		  [sum_step] "im" (sum_step)
285 		: "rsi", "rdi", "edx", "ecx", "eax", "memory", "cc"
286 #ifndef BOUNDED_RBX
287 		  , "rbx"
288 #endif
289 	);
290 }
291 
292 /*
293  *  24-bit version
294  */
MIX_AREAS_24(unsigned int size,volatile unsigned char * dst,unsigned char * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)295 static void MIX_AREAS_24(unsigned int size,
296 			 volatile unsigned char *dst, unsigned char *src,
297 			 volatile signed int *sum, size_t dst_step,
298 			 size_t src_step, size_t sum_step)
299 {
300 #ifdef BOUNDED_RBX
301 	unsigned long long old_rbx;
302 #endif
303 	/*
304 	 *  RSI - src
305 	 *  RDI - dst
306 	 *  RBX - sum
307 	 *  ECX - old sample
308 	 *  EAX - sample / temporary
309 	 *  EDX - temporary
310 	 */
311 	__asm__ __volatile__ (
312 		"\n"
313 #ifdef BOUNDED_RBX
314 		"\tmovq %%rbx, %[old_rbx]\n"
315 #endif
316 		/*
317 		 *  initialization, load RSI, RDI, RBX registers
318 		 */
319 #ifndef _ILP32
320 		"\tmovq %[dst], %%rdi\n"
321 		"\tmovq %[src], %%rsi\n"
322 		"\tmovq %[sum], %%rbx\n"
323 #else
324 		"\tmovl %[dst], %%edi\n"
325 		"\tmovl %[src], %%esi\n"
326 		"\tmovl %[sum], %%ebx\n"
327 #endif
328 
329 		/*
330 		 * while (size-- > 0) {
331 		 */
332 		"\tcmpl $0, %[size]\n"
333 		"jz 6f\n"
334 
335 		"\t.p2align 4,,15\n"
336 
337 		"1:"
338 
339 		/*
340 		 *   sample = *src;
341 		 *   sum_sample = *sum;
342 		 *   if (test_and_set_bit(0, dst) == 0)
343 		 *     sample -= sum_sample;
344 		 *   *sum += sample;
345 		 */
346 		"\tmovsbl 2(%%rsi), %%eax\n"
347 		"\tmovzwl (%%rsi), %%ecx\n"
348 		"\tmovl (%%rbx), %%edx\n"
349 		"\tsall $16, %%eax\n"
350 		"\torl %%eax, %%ecx\n"
351 		"\t" LOCK_PREFIX "btsw $0, (%%rdi)\n"
352 		"\tjc 2f\n"
353 		"\t" XSUB " %%edx, %%ecx\n"
354 		"2:"
355 		"\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n"
356 
357 		/*
358 		 *   do {
359 		 *     sample = old_sample = *sum;
360 		 *     saturate(sample);
361 		 *     *dst = sample | 1;
362 		 *   } while (old_sample != *sum);
363 		 */
364 
365 		"3:"
366 		"\tmovl (%%rbx), %%ecx\n"
367 
368 		"\tmovl $0x7fffff, %%eax\n"
369 		"\tmovl $-0x7fffff, %%edx\n"
370 		"\tcmpl %%eax, %%ecx\n"
371 		"\tcmovng %%ecx, %%eax\n"
372 		"\tcmpl %%edx, %%ecx\n"
373 		"\tcmovl %%edx, %%eax\n"
374 
375 		"\torl $1, %%eax\n"
376 		"\tmovw %%ax, (%%rdi)\n"
377 		"\tshrl $16, %%eax\n"
378 		"\tmovb %%al, 2(%%rdi)\n"
379 
380 		"\tcmpl %%ecx, (%%rbx)\n"
381 		"\tjnz 3b\n"
382 
383 		/*
384 		 * while (size-- > 0)
385 		 */
386 #ifndef _ILP32
387 		"\taddq %[dst_step], %%rdi\n"
388 		"\taddq %[src_step], %%rsi\n"
389 		"\taddq %[sum_step], %%rbx\n"
390 #else
391 		"\taddl %[dst_step], %%edi\n"
392 		"\taddl %[src_step], %%esi\n"
393 		"\taddl %[sum_step], %%ebx\n"
394 #endif
395 		"\tdecl %[size]\n"
396 		"\tjnz 1b\n"
397 
398 		"6:"
399 #ifdef BOUNDED_RBX
400 		"\tmovq %[old_rbx], %%rbx\n"
401 #endif
402 		: [size] "+&rm" (size)
403 #ifdef BOUNDED_RBX
404 		  , [old_rbx] "=m" (old_rbx)
405 #endif
406 	        : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
407 		  [dst_step] "im" (dst_step),  [src_step] "im" (src_step),
408 		  [sum_step] "im" (sum_step)
409 		: "rsi", "rdi", "edx", "ecx", "eax", "memory", "cc"
410 #ifndef BOUNDED_RBX
411 		  , "rbx"
412 #endif
413 	);
414 }
415 
416 #ifdef BOUNDED_RBX
417 #  undef BOUNDED_RBX
418 #endif
419