• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * \file pcm/pcm_dmix_i386.h
3  * \ingroup PCM_Plugins
4  * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - I386 assembler code
5  * \author Jaroslav Kysela <perex@perex.cz>
6  * \date 2003
7  */
8 /*
9  *  PCM - Direct Stream Mixing
10  *  Copyright (c) 2003 by Jaroslav Kysela <perex@perex.cz>
11  *
12  *
13  *   This library is free software; you can redistribute it and/or modify
14  *   it under the terms of the GNU Lesser General Public License as
15  *   published by the Free Software Foundation; either version 2.1 of
16  *   the License, or (at your option) any later version.
17  *
18  *   This program is distributed in the hope that it will be useful,
19  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
20  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  *   GNU Lesser General Public License for more details.
22  *
23  *   You should have received a copy of the GNU Lesser General Public
24  *   License along with this library; if not, write to the Free Software
25  *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
26  *
27  */
28 
29 #if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
30 #  define BOUNDED_EBX
31 #endif
32 
33 /*
34  *  for plain i386
35  */
MIX_AREAS_16(unsigned int size,volatile signed short * dst,signed short * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)36 static void MIX_AREAS_16(unsigned int size,
37 			 volatile signed short *dst, signed short *src,
38 			 volatile signed int *sum, size_t dst_step,
39 			 size_t src_step, size_t sum_step)
40 {
41 #ifdef BOUNDED_EBX
42 	unsigned int old_ebx;
43 #endif
44 	/*
45 	 *  ESI - src
46 	 *  EDI - dst
47 	 *  EBX - sum
48 	 *  ECX - old sample
49 	 *  EAX - sample / temporary
50 	 *  EDX - temporary
51 	 */
52 	__asm__ __volatile__ (
53 		"\n"
54 #ifdef BOUNDED_EBX
55 		"\tmovl %%ebx, %[old_ebx]\n"	/* ebx is GOT pointer (-fPIC) */
56 #endif
57 		/*
58 		 *  initialization, load ESI, EDI, EBX registers
59 		 */
60 		"\tmovl %[dst], %%edi\n"
61 		"\tmovl %[src], %%esi\n"
62 		"\tmovl %[sum], %%ebx\n"
63 		"\tcmpl $0, %[size]\n"
64 		"\tjnz 2f\n"
65 		"\tjmp 7f\n"
66 
67 
68 		/*
69 		 * for (;;)
70 		 */
71 		"\t.p2align 4,,15\n"
72 		"1:"
73 		"\tadd %[dst_step], %%edi\n"
74 		"\tadd %[src_step], %%esi\n"
75 		"\tadd %[sum_step], %%ebx\n"
76 
77 		/*
78 		 *   sample = *src;
79 		 *   sum_sample = *sum;
80 		 *   if (cmpxchg(*dst, 0, 1) == 0)
81 		 *     sample -= sum_sample;
82 		 *   xadd(*sum, sample);
83 		 */
84 
85 		"2:"
86 		"\tmovw $0, %%ax\n"
87 		"\tmovw $1, %%cx\n"
88 		"\tmovl (%%ebx), %%edx\n"
89 		"\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
90 		"\tmovswl (%%esi), %%ecx\n"
91 		"\tjnz 3f\n"
92 		"\t" XSUB " %%edx, %%ecx\n"
93 		"3:"
94 		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
95 
96 		/*
97 		 *   do {
98 		 *     sample = old_sample = *sum;
99 		 *     saturate(v);
100 		 *     *dst = sample;
101 		 *   } while (v != *sum);
102 		 */
103 
104 		"4:"
105 		"\tmovl (%%ebx), %%ecx\n"
106 		"\tcmpl $0x7fff,%%ecx\n"
107 		"\tjg 5f\n"
108 		"\tcmpl $-0x8000,%%ecx\n"
109 		"\tjl 6f\n"
110 		"\tmovw %%cx, (%%edi)\n"
111 		"\tcmpl %%ecx, (%%ebx)\n"
112 		"\tjnz 4b\n"
113 
114 		/*
115 		 * while (size-- > 0)
116 		 */
117 		"\tdecl %[size]\n"
118 		"\tjnz 1b\n"
119 		"\tjmp 7f\n"
120 
121 		/*
122 		 *  sample > 0x7fff
123 		 */
124 
125 		"\t.p2align 4,,15\n"
126 
127 		"5:"
128 		"\tmovw $0x7fff, (%%edi)\n"
129 		"\tcmpl %%ecx,(%%ebx)\n"
130 		"\tjnz 4b\n"
131 		"\tdecl %[size]\n"
132 		"\tjnz 1b\n"
133 		"\tjmp 7f\n"
134 
135 		/*
136 		 *  sample < -0x8000
137 		 */
138 
139 		"\t.p2align 4,,15\n"
140 
141 		"6:"
142 		"\tmovw $-0x8000, (%%edi)\n"
143 		"\tcmpl %%ecx, (%%ebx)\n"
144 		"\tjnz 4b\n"
145 		"\tdecl %[size]\n"
146 		"\tjnz 1b\n"
147 
148 		"7:"
149 #ifdef BOUNDED_EBX
150 		"\tmovl %[old_ebx], %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
151 #endif
152 		: [size] "+&rm" (size)
153 #ifdef BOUNDED_EBX
154 		  , [old_ebx] "=m" (old_ebx)
155 #endif
156 		: [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
157 		  [dst_step] "im" (dst_step),  [src_step] "im" (src_step),
158 		  [sum_step] "im" (sum_step)
159 		: "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
160 #ifndef BOUNDED_EBX
161 		  , "ebx"
162 #endif
163 	);
164 }
165 
166 /*
167  *  MMX optimized
168  */
MIX_AREAS_16_MMX(unsigned int size,volatile signed short * dst,signed short * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)169 static void MIX_AREAS_16_MMX(unsigned int size,
170 			     volatile signed short *dst, signed short *src,
171 			     volatile signed int *sum, size_t dst_step,
172 			     size_t src_step, size_t sum_step)
173 {
174 #ifdef BOUNDED_EBX
175 	unsigned int old_ebx;
176 #endif
177 	/*
178 	 *  ESI - src
179 	 *  EDI - dst
180 	 *  EBX - sum
181 	 *  ECX - old sample
182 	 *  EAX - sample / temporary
183 	 *  EDX - temporary
184 	 */
185 	__asm__ __volatile__ (
186 		"\n"
187 #ifdef BOUNDED_EBX
188 		"\tmovl %%ebx, %[old_ebx]\n"	/* ebx is GOT pointer (-fPIC) */
189 #endif
190 		/*
191 		 *  initialization, load ESI, EDI, EBX registers
192 		 */
193 		"\tmovl %[dst], %%edi\n"
194 		"\tmovl %[src], %%esi\n"
195 		"\tmovl %[sum], %%ebx\n"
196 		"\tcmpl $0, %[size]\n"
197 		"\tjnz 2f\n"
198 		"\tjmp 5f\n"
199 
200 		"\t.p2align 4,,15\n"
201 		"1:"
202 		"\tadd %[dst_step], %%edi\n"
203 		"\tadd %[src_step], %%esi\n"
204 		"\tadd %[sum_step], %%ebx\n"
205 
206 		"2:"
207 		/*
208 		 *   sample = *src;
209 		 *   sum_sample = *sum;
210 		 *   if (cmpxchg(*dst, 0, 1) == 0)
211 		 *     sample -= sum_sample;
212 		 *   xadd(*sum, sample);
213 		 */
214 		"\tmovw $0, %%ax\n"
215 		"\tmovw $1, %%cx\n"
216 		"\tmovl (%%ebx), %%edx\n"
217 		"\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
218 		"\tmovswl (%%esi), %%ecx\n"
219 		"\tjnz 3f\n"
220 		"\t" XSUB " %%edx, %%ecx\n"
221 		"3:"
222 		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
223 
224 		/*
225 		 *   do {
226 		 *     sample = old_sample = *sum;
227 		 *     saturate(v);
228 		 *     *dst = sample;
229 		 *   } while (v != *sum);
230 		 */
231 
232 		"4:"
233 		"\tmovl (%%ebx), %%ecx\n"
234 		"\tmovd %%ecx, %%mm0\n"
235 		"\tpackssdw %%mm1, %%mm0\n"
236 		"\tmovd %%mm0, %%eax\n"
237 		"\tmovw %%ax, (%%edi)\n"
238 		"\tcmpl %%ecx, (%%ebx)\n"
239 		"\tjnz 4b\n"
240 
241 		/*
242 		 * while (size-- > 0)
243 		 */
244 		"\tdecl %[size]\n"
245 		"\tjnz 1b\n"
246 		"\temms\n"
247                 "5:"
248 #ifdef BOUNDED_EBX
249 		"\tmovl %[old_ebx], %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
250 #endif
251 		: [size] "+&rm" (size)
252 #ifdef BOUNDED_EBX
253 		  , [old_ebx] "=m" (old_ebx)
254 #endif
255 		: [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
256 		  [dst_step] "im" (dst_step),  [src_step] "im" (src_step),
257 		  [sum_step] "im" (sum_step)
258 		: "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
259 #ifndef BOUNDED_EBX
260 		  , "ebx"
261 #endif
262 #ifdef HAVE_MMX
263 		  , "mm0"
264 #else
265 		  , "st", "st(1)", "st(2)", "st(3)",
266 		  "st(4)", "st(5)", "st(6)", "st(7)"
267 #endif
268 	);
269 }
270 
271 /*
272  *  for plain i386, 32-bit version (24-bit resolution)
273  */
MIX_AREAS_32(unsigned int size,volatile signed int * dst,signed int * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)274 static void MIX_AREAS_32(unsigned int size,
275 			 volatile signed int *dst, signed int *src,
276 			 volatile signed int *sum, size_t dst_step,
277 			 size_t src_step, size_t sum_step)
278 {
279 #ifdef BOUNDED_EBX
280 	unsigned int old_ebx;
281 #endif
282 	/*
283 	 *  ESI - src
284 	 *  EDI - dst
285 	 *  EBX - sum
286 	 *  ECX - old sample
287 	 *  EAX - sample / temporary
288 	 *  EDX - temporary
289 	 */
290 	__asm__ __volatile__ (
291 		"\n"
292 #ifdef BOUNDED_EBX
293 		"\tmovl %%ebx, %[old_ebx]\n"	/* ebx is GOT pointer (-fPIC) */
294 #endif
295 		/*
296 		 *  initialization, load ESI, EDI, EBX registers
297 		 */
298 		"\tmovl %[dst], %%edi\n"
299 		"\tmovl %[src], %%esi\n"
300 		"\tmovl %[sum], %%ebx\n"
301 		"\tcmpl $0, %[size]\n"
302 		"\tjnz 1f\n"
303 		"\tjmp 6f\n"
304 
305 		"\t.p2align 4,,15\n"
306 
307 		"1:"
308 
309 		/*
310 		 *   sample = *src;
311 		 *   sum_sample = *sum;
312 		 *   if (cmpxchg(*dst, 0, 1) == 0)
313 		 *     sample -= sum_sample;
314 		 *   xadd(*sum, sample);
315 		 */
316 		"\tmovl $0, %%eax\n"
317 		"\tmovl $1, %%ecx\n"
318 		"\tmovl (%%ebx), %%edx\n"
319 		"\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%edi)\n"
320 		"\tjnz 2f\n"
321 		"\tmovl (%%esi), %%ecx\n"
322 		/* sample >>= 8 */
323 		"\tsarl $8, %%ecx\n"
324 		"\t" XSUB " %%edx, %%ecx\n"
325 		"\tjmp 21f\n"
326 		"2:"
327 		"\tmovl (%%esi), %%ecx\n"
328 		/* sample >>= 8 */
329 		"\tsarl $8, %%ecx\n"
330 		"21:"
331 		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
332 
333 		/*
334 		 *   do {
335 		 *     sample = old_sample = *sum;
336 		 *     saturate(v);
337 		 *     *dst = sample;
338 		 *   } while (v != *sum);
339 		 */
340 
341 		"3:"
342 		"\tmovl (%%ebx), %%ecx\n"
343 		/*
344 		 *  if (sample > 0x7fff00)
345 		 */
346 		"\tmovl $0x7fffff, %%eax\n"
347 		"\tcmpl %%eax, %%ecx\n"
348 		"\tjg 4f\n"
349 		/*
350 		 *  if (sample < -0x800000)
351 		 */
352 		"\tmovl $-0x800000, %%eax\n"
353 		"\tcmpl %%eax, %%ecx\n"
354 		"\tjl 4f\n"
355 		"\tmovl %%ecx, %%eax\n"
356 		"4:"
357 		/*
358 		 *  sample <<= 8;
359 		 */
360 		"\tsall $8, %%eax\n"
361 		"\tmovl %%eax, (%%edi)\n"
362 		"\tcmpl %%ecx, (%%ebx)\n"
363 		"\tjnz 3b\n"
364 
365 		/*
366 		 * while (size-- > 0)
367 		 */
368 		"\tdecl %[size]\n"
369 		"\tjz 6f\n"
370 		"\tadd %[dst_step], %%edi\n"
371 		"\tadd %[src_step], %%esi\n"
372 		"\tadd %[sum_step], %%ebx\n"
373 		"\tjmp 1b\n"
374 
375 		"6:"
376 #ifdef BOUNDED_EBX
377 		"\tmovl %[old_ebx], %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
378 #endif
379 		: [size] "+&rm" (size)
380 #ifdef BOUNDED_EBX
381 		  , [old_ebx] "=m" (old_ebx)
382 #endif
383 		: [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
384 		  [dst_step] "im" (dst_step),  [src_step] "im" (src_step),
385 		  [sum_step] "im" (sum_step)
386 		: "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
387 #ifndef BOUNDED_EBX
388 		  , "ebx"
389 #endif
390 	);
391 }
392 
393 /*
394  * 24-bit version for plain i386
395  */
MIX_AREAS_24(unsigned int size,volatile unsigned char * dst,unsigned char * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)396 static void MIX_AREAS_24(unsigned int size,
397 			 volatile unsigned char *dst, unsigned char *src,
398 			 volatile signed int *sum, size_t dst_step,
399 			 size_t src_step, size_t sum_step)
400 {
401 #ifdef BOUNDED_EBX
402 	unsigned int old_ebx;
403 #endif
404 	/*
405 	 *  ESI - src
406 	 *  EDI - dst
407 	 *  EBX - sum
408 	 *  ECX - old sample
409 	 *  EAX - sample / temporary
410 	 *  EDX - temporary
411 	 */
412 	__asm__ __volatile__ (
413 		"\n"
414 #ifdef BOUNDED_EBX
415 		"\tmovl %%ebx, %[old_ebx]\n"	/* ebx is GOT pointer (-fPIC) */
416 #endif
417 		/*
418 		 *  initialization, load ESI, EDI, EBX registers
419 		 */
420 		"\tmovl %[dst], %%edi\n"
421 		"\tmovl %[src], %%esi\n"
422 		"\tmovl %[sum], %%ebx\n"
423 		"\tcmpl $0, %[size]\n"
424 		"\tjnz 1f\n"
425 		"\tjmp 6f\n"
426 
427 		"\t.p2align 4,,15\n"
428 
429 		"1:"
430 
431 		/*
432 		 *   sample = *src;
433 		 *   sum_sample = *sum;
434 		 *   if (test_and_set_bit(0, dst) == 0)
435 		 *     sample -= sum_sample;
436 		 *   *sum += sample;
437 		 */
438 		"\tmovsbl 2(%%esi), %%eax\n"
439 		"\tmovzwl (%%esi), %%ecx\n"
440 		"\tmovl (%%ebx), %%edx\n"
441 		"\tsall $16, %%eax\n"
442 		"\torl %%eax, %%ecx\n"
443 		"\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
444 		"\tjc 2f\n"
445 		"\t" XSUB " %%edx, %%ecx\n"
446 		"2:"
447 		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
448 
449 		/*
450 		 *   do {
451 		 *     sample = old_sample = *sum;
452 		 *     saturate(sample);
453 		 *     *dst = sample | 1;
454 		 *   } while (old_sample != *sum);
455 		 */
456 
457 		"3:"
458 		"\tmovl (%%ebx), %%ecx\n"
459 		/*
460 		 *  if (sample > 0x7fffff)
461 		 */
462 		"\tmovl $0x7fffff, %%eax\n"
463 		"\tcmpl %%eax, %%ecx\n"
464 		"\tjg 4f\n"
465 		/*
466 		 *  if (sample < -0x7fffff)
467 		 */
468 		"\tmovl $-0x7fffff, %%eax\n"
469 		"\tcmpl %%eax, %%ecx\n"
470 		"\tjl 4f\n"
471 		"\tmovl %%ecx, %%eax\n"
472 		"\torl $1, %%eax\n"
473 		"4:"
474 		"\tmovw %%ax, (%%edi)\n"
475 		"\tshrl $16, %%eax\n"
476 		"\tmovb %%al, 2(%%edi)\n"
477 		"\tcmpl %%ecx, (%%ebx)\n"
478 		"\tjnz 3b\n"
479 
480 		/*
481 		 * while (size-- > 0)
482 		 */
483 		"\tdecl %[size]\n"
484 		"\tjz 6f\n"
485 		"\tadd %[dst_step], %%edi\n"
486 		"\tadd %[src_step], %%esi\n"
487 		"\tadd %[sum_step], %%ebx\n"
488 		"\tjmp 1b\n"
489 
490 		"6:"
491 #ifdef BOUNDED_EBX
492 		"\tmovl %[old_ebx], %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
493 #endif
494 		: [size] "+&rm" (size)
495 #ifdef BOUNDED_EBX
496 		  , [old_ebx] "=m" (old_ebx)
497 #endif
498 		: [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
499 		  [dst_step] "im" (dst_step),  [src_step] "im" (src_step),
500 		  [sum_step] "im" (sum_step)
501 		: "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
502 #ifndef BOUNDED_EBX
503 		  , "ebx"
504 #endif
505 	);
506 }
507 
508 /*
509  * 24-bit version for Pentium Pro/II
510  */
MIX_AREAS_24_CMOV(unsigned int size,volatile unsigned char * dst,unsigned char * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)511 static void MIX_AREAS_24_CMOV(unsigned int size,
512 			      volatile unsigned char *dst, unsigned char *src,
513 			      volatile signed int *sum, size_t dst_step,
514 			      size_t src_step, size_t sum_step)
515 {
516 #ifdef BOUNDED_EBX
517 	unsigned int old_ebx;
518 #endif
519 	/*
520 	 *  ESI - src
521 	 *  EDI - dst
522 	 *  EBX - sum
523 	 *  ECX - old sample
524 	 *  EAX - sample / temporary
525 	 *  EDX - temporary
526 	 */
527 	__asm__ __volatile__ (
528 		"\n"
529 #ifdef BOUNDED_EBX
530 		"\tmovl %%ebx, %[old_ebx]\n"	/* ebx is GOT pointer (-fPIC) */
531 #endif
532 		/*
533 		 *  initialization, load ESI, EDI, EBX registers
534 		 */
535 		"\tmovl %[dst], %%edi\n"
536 		"\tmovl %[src], %%esi\n"
537 		"\tmovl %[sum], %%ebx\n"
538 		"\tcmpl $0, %[size]\n"
539 		"\tjz 6f\n"
540 
541 		"\t.p2align 4,,15\n"
542 
543 		"1:"
544 
545 		/*
546 		 *   sample = *src;
547 		 *   sum_sample = *sum;
548 		 *   if (test_and_set_bit(0, dst) == 0)
549 		 *     sample -= sum_sample;
550 		 *   *sum += sample;
551 		 */
552 		"\tmovsbl 2(%%esi), %%eax\n"
553 		"\tmovzwl (%%esi), %%ecx\n"
554 		"\tmovl (%%ebx), %%edx\n"
555 		"\tsall $16, %%eax\n"
556 		"\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
557 		"\tleal (%%ecx,%%eax,1), %%ecx\n"
558 		"\tjc 2f\n"
559 		"\t" XSUB " %%edx, %%ecx\n"
560 		"2:"
561 		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
562 
563 		/*
564 		 *   do {
565 		 *     sample = old_sample = *sum;
566 		 *     saturate(sample);
567 		 *     *dst = sample | 1;
568 		 *   } while (old_sample != *sum);
569 		 */
570 
571 		"3:"
572 		"\tmovl (%%ebx), %%ecx\n"
573 
574 		"\tmovl $0x7fffff, %%eax\n"
575 		"\tmovl $-0x7fffff, %%edx\n"
576 		"\tcmpl %%eax, %%ecx\n"
577 		"\tcmovng %%ecx, %%eax\n"
578 		"\tcmpl %%edx, %%ecx\n"
579 		"\tcmovl %%edx, %%eax\n"
580 
581 		"\torl $1, %%eax\n"
582 		"\tmovw %%ax, (%%edi)\n"
583 		"\tshrl $16, %%eax\n"
584 		"\tmovb %%al, 2(%%edi)\n"
585 
586 		"\tcmpl %%ecx, (%%ebx)\n"
587 		"\tjnz 3b\n"
588 
589 		/*
590 		 * while (size-- > 0)
591 		 */
592 		"\tadd %[dst_step], %%edi\n"
593 		"\tadd %[src_step], %%esi\n"
594 		"\tadd %[sum_step], %%ebx\n"
595 		"\tdecl %[size]\n"
596 		"\tjnz 1b\n"
597 
598 		"6:"
599 #ifdef BOUNDED_EBX
600 		"\tmovl %[old_ebx], %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
601 #endif
602 		: [size] "+&rm" (size)
603 #ifdef BOUNDED_EBX
604 		  , [old_ebx] "=m" (old_ebx)
605 #endif
606 		: [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
607 		  [dst_step] "im" (dst_step),  [src_step] "im" (src_step),
608 		  [sum_step] "im" (sum_step)
609 		: "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
610 #ifndef BOUNDED_EBX
611 		  , "ebx"
612 #endif
613 	);
614 }
615 
616 #ifdef BOUNDED_EBX
617 #  undef BOUNDED_EBX
618 #endif
619