1 /**
2 * \file pcm/pcm_dmix_i386.h
3 * \ingroup PCM_Plugins
4 * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - I386 assembler code
5 * \author Jaroslav Kysela <perex@perex.cz>
6 * \date 2003
7 */
8 /*
9 * PCM - Direct Stream Mixing
10 * Copyright (c) 2003 by Jaroslav Kysela <perex@perex.cz>
11 *
12 *
13 * This library is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU Lesser General Public License as
15 * published by the Free Software Foundation; either version 2.1 of
16 * the License, or (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 *
27 */
28
29 #if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
30 # define BOUNDED_EBX
31 #endif
32
33 /*
34 * for plain i386
35 */
MIX_AREAS_16(unsigned int size,volatile signed short * dst,signed short * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)36 static void MIX_AREAS_16(unsigned int size,
37 volatile signed short *dst, signed short *src,
38 volatile signed int *sum, size_t dst_step,
39 size_t src_step, size_t sum_step)
40 {
41 #ifdef BOUNDED_EBX
42 unsigned int old_ebx;
43 #endif
44 /*
45 * ESI - src
46 * EDI - dst
47 * EBX - sum
48 * ECX - old sample
49 * EAX - sample / temporary
50 * EDX - temporary
51 */
52 __asm__ __volatile__ (
53 "\n"
54 #ifdef BOUNDED_EBX
55 "\tmovl %%ebx, %[old_ebx]\n" /* ebx is GOT pointer (-fPIC) */
56 #endif
57 /*
58 * initialization, load ESI, EDI, EBX registers
59 */
60 "\tmovl %[dst], %%edi\n"
61 "\tmovl %[src], %%esi\n"
62 "\tmovl %[sum], %%ebx\n"
63 "\tcmpl $0, %[size]\n"
64 "\tjnz 2f\n"
65 "\tjmp 7f\n"
66
67
68 /*
69 * for (;;)
70 */
71 "\t.p2align 4,,15\n"
72 "1:"
73 "\tadd %[dst_step], %%edi\n"
74 "\tadd %[src_step], %%esi\n"
75 "\tadd %[sum_step], %%ebx\n"
76
77 /*
78 * sample = *src;
79 * sum_sample = *sum;
80 * if (cmpxchg(*dst, 0, 1) == 0)
81 * sample -= sum_sample;
82 * xadd(*sum, sample);
83 */
84
85 "2:"
86 "\tmovw $0, %%ax\n"
87 "\tmovw $1, %%cx\n"
88 "\tmovl (%%ebx), %%edx\n"
89 "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
90 "\tmovswl (%%esi), %%ecx\n"
91 "\tjnz 3f\n"
92 "\t" XSUB " %%edx, %%ecx\n"
93 "3:"
94 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
95
96 /*
97 * do {
98 * sample = old_sample = *sum;
99 * saturate(v);
100 * *dst = sample;
101 * } while (v != *sum);
102 */
103
104 "4:"
105 "\tmovl (%%ebx), %%ecx\n"
106 "\tcmpl $0x7fff,%%ecx\n"
107 "\tjg 5f\n"
108 "\tcmpl $-0x8000,%%ecx\n"
109 "\tjl 6f\n"
110 "\tmovw %%cx, (%%edi)\n"
111 "\tcmpl %%ecx, (%%ebx)\n"
112 "\tjnz 4b\n"
113
114 /*
115 * while (size-- > 0)
116 */
117 "\tdecl %[size]\n"
118 "\tjnz 1b\n"
119 "\tjmp 7f\n"
120
121 /*
122 * sample > 0x7fff
123 */
124
125 "\t.p2align 4,,15\n"
126
127 "5:"
128 "\tmovw $0x7fff, (%%edi)\n"
129 "\tcmpl %%ecx,(%%ebx)\n"
130 "\tjnz 4b\n"
131 "\tdecl %[size]\n"
132 "\tjnz 1b\n"
133 "\tjmp 7f\n"
134
135 /*
136 * sample < -0x8000
137 */
138
139 "\t.p2align 4,,15\n"
140
141 "6:"
142 "\tmovw $-0x8000, (%%edi)\n"
143 "\tcmpl %%ecx, (%%ebx)\n"
144 "\tjnz 4b\n"
145 "\tdecl %[size]\n"
146 "\tjnz 1b\n"
147
148 "7:"
149 #ifdef BOUNDED_EBX
150 "\tmovl %[old_ebx], %%ebx\n" /* ebx is GOT pointer (-fPIC) */
151 #endif
152 : [size] "+&rm" (size)
153 #ifdef BOUNDED_EBX
154 , [old_ebx] "=m" (old_ebx)
155 #endif
156 : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
157 [dst_step] "im" (dst_step), [src_step] "im" (src_step),
158 [sum_step] "im" (sum_step)
159 : "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
160 #ifndef BOUNDED_EBX
161 , "ebx"
162 #endif
163 );
164 }
165
166 /*
167 * MMX optimized
168 */
MIX_AREAS_16_MMX(unsigned int size,volatile signed short * dst,signed short * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)169 static void MIX_AREAS_16_MMX(unsigned int size,
170 volatile signed short *dst, signed short *src,
171 volatile signed int *sum, size_t dst_step,
172 size_t src_step, size_t sum_step)
173 {
174 #ifdef BOUNDED_EBX
175 unsigned int old_ebx;
176 #endif
177 /*
178 * ESI - src
179 * EDI - dst
180 * EBX - sum
181 * ECX - old sample
182 * EAX - sample / temporary
183 * EDX - temporary
184 */
185 __asm__ __volatile__ (
186 "\n"
187 #ifdef BOUNDED_EBX
188 "\tmovl %%ebx, %[old_ebx]\n" /* ebx is GOT pointer (-fPIC) */
189 #endif
190 /*
191 * initialization, load ESI, EDI, EBX registers
192 */
193 "\tmovl %[dst], %%edi\n"
194 "\tmovl %[src], %%esi\n"
195 "\tmovl %[sum], %%ebx\n"
196 "\tcmpl $0, %[size]\n"
197 "\tjnz 2f\n"
198 "\tjmp 5f\n"
199
200 "\t.p2align 4,,15\n"
201 "1:"
202 "\tadd %[dst_step], %%edi\n"
203 "\tadd %[src_step], %%esi\n"
204 "\tadd %[sum_step], %%ebx\n"
205
206 "2:"
207 /*
208 * sample = *src;
209 * sum_sample = *sum;
210 * if (cmpxchg(*dst, 0, 1) == 0)
211 * sample -= sum_sample;
212 * xadd(*sum, sample);
213 */
214 "\tmovw $0, %%ax\n"
215 "\tmovw $1, %%cx\n"
216 "\tmovl (%%ebx), %%edx\n"
217 "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
218 "\tmovswl (%%esi), %%ecx\n"
219 "\tjnz 3f\n"
220 "\t" XSUB " %%edx, %%ecx\n"
221 "3:"
222 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
223
224 /*
225 * do {
226 * sample = old_sample = *sum;
227 * saturate(v);
228 * *dst = sample;
229 * } while (v != *sum);
230 */
231
232 "4:"
233 "\tmovl (%%ebx), %%ecx\n"
234 "\tmovd %%ecx, %%mm0\n"
235 "\tpackssdw %%mm1, %%mm0\n"
236 "\tmovd %%mm0, %%eax\n"
237 "\tmovw %%ax, (%%edi)\n"
238 "\tcmpl %%ecx, (%%ebx)\n"
239 "\tjnz 4b\n"
240
241 /*
242 * while (size-- > 0)
243 */
244 "\tdecl %[size]\n"
245 "\tjnz 1b\n"
246 "\temms\n"
247 "5:"
248 #ifdef BOUNDED_EBX
249 "\tmovl %[old_ebx], %%ebx\n" /* ebx is GOT pointer (-fPIC) */
250 #endif
251 : [size] "+&rm" (size)
252 #ifdef BOUNDED_EBX
253 , [old_ebx] "=m" (old_ebx)
254 #endif
255 : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
256 [dst_step] "im" (dst_step), [src_step] "im" (src_step),
257 [sum_step] "im" (sum_step)
258 : "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
259 #ifndef BOUNDED_EBX
260 , "ebx"
261 #endif
262 #ifdef HAVE_MMX
263 , "mm0"
264 #else
265 , "st", "st(1)", "st(2)", "st(3)",
266 "st(4)", "st(5)", "st(6)", "st(7)"
267 #endif
268 );
269 }
270
271 /*
272 * for plain i386, 32-bit version (24-bit resolution)
273 */
MIX_AREAS_32(unsigned int size,volatile signed int * dst,signed int * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)274 static void MIX_AREAS_32(unsigned int size,
275 volatile signed int *dst, signed int *src,
276 volatile signed int *sum, size_t dst_step,
277 size_t src_step, size_t sum_step)
278 {
279 #ifdef BOUNDED_EBX
280 unsigned int old_ebx;
281 #endif
282 /*
283 * ESI - src
284 * EDI - dst
285 * EBX - sum
286 * ECX - old sample
287 * EAX - sample / temporary
288 * EDX - temporary
289 */
290 __asm__ __volatile__ (
291 "\n"
292 #ifdef BOUNDED_EBX
293 "\tmovl %%ebx, %[old_ebx]\n" /* ebx is GOT pointer (-fPIC) */
294 #endif
295 /*
296 * initialization, load ESI, EDI, EBX registers
297 */
298 "\tmovl %[dst], %%edi\n"
299 "\tmovl %[src], %%esi\n"
300 "\tmovl %[sum], %%ebx\n"
301 "\tcmpl $0, %[size]\n"
302 "\tjnz 1f\n"
303 "\tjmp 6f\n"
304
305 "\t.p2align 4,,15\n"
306
307 "1:"
308
309 /*
310 * sample = *src;
311 * sum_sample = *sum;
312 * if (cmpxchg(*dst, 0, 1) == 0)
313 * sample -= sum_sample;
314 * xadd(*sum, sample);
315 */
316 "\tmovl $0, %%eax\n"
317 "\tmovl $1, %%ecx\n"
318 "\tmovl (%%ebx), %%edx\n"
319 "\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%edi)\n"
320 "\tjnz 2f\n"
321 "\tmovl (%%esi), %%ecx\n"
322 /* sample >>= 8 */
323 "\tsarl $8, %%ecx\n"
324 "\t" XSUB " %%edx, %%ecx\n"
325 "\tjmp 21f\n"
326 "2:"
327 "\tmovl (%%esi), %%ecx\n"
328 /* sample >>= 8 */
329 "\tsarl $8, %%ecx\n"
330 "21:"
331 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
332
333 /*
334 * do {
335 * sample = old_sample = *sum;
336 * saturate(v);
337 * *dst = sample;
338 * } while (v != *sum);
339 */
340
341 "3:"
342 "\tmovl (%%ebx), %%ecx\n"
343 /*
344 * if (sample > 0x7fff00)
345 */
346 "\tmovl $0x7fffff, %%eax\n"
347 "\tcmpl %%eax, %%ecx\n"
348 "\tjg 4f\n"
349 /*
350 * if (sample < -0x800000)
351 */
352 "\tmovl $-0x800000, %%eax\n"
353 "\tcmpl %%eax, %%ecx\n"
354 "\tjl 4f\n"
355 "\tmovl %%ecx, %%eax\n"
356 "4:"
357 /*
358 * sample <<= 8;
359 */
360 "\tsall $8, %%eax\n"
361 "\tmovl %%eax, (%%edi)\n"
362 "\tcmpl %%ecx, (%%ebx)\n"
363 "\tjnz 3b\n"
364
365 /*
366 * while (size-- > 0)
367 */
368 "\tdecl %[size]\n"
369 "\tjz 6f\n"
370 "\tadd %[dst_step], %%edi\n"
371 "\tadd %[src_step], %%esi\n"
372 "\tadd %[sum_step], %%ebx\n"
373 "\tjmp 1b\n"
374
375 "6:"
376 #ifdef BOUNDED_EBX
377 "\tmovl %[old_ebx], %%ebx\n" /* ebx is GOT pointer (-fPIC) */
378 #endif
379 : [size] "+&rm" (size)
380 #ifdef BOUNDED_EBX
381 , [old_ebx] "=m" (old_ebx)
382 #endif
383 : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
384 [dst_step] "im" (dst_step), [src_step] "im" (src_step),
385 [sum_step] "im" (sum_step)
386 : "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
387 #ifndef BOUNDED_EBX
388 , "ebx"
389 #endif
390 );
391 }
392
393 /*
394 * 24-bit version for plain i386
395 */
MIX_AREAS_24(unsigned int size,volatile unsigned char * dst,unsigned char * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)396 static void MIX_AREAS_24(unsigned int size,
397 volatile unsigned char *dst, unsigned char *src,
398 volatile signed int *sum, size_t dst_step,
399 size_t src_step, size_t sum_step)
400 {
401 #ifdef BOUNDED_EBX
402 unsigned int old_ebx;
403 #endif
404 /*
405 * ESI - src
406 * EDI - dst
407 * EBX - sum
408 * ECX - old sample
409 * EAX - sample / temporary
410 * EDX - temporary
411 */
412 __asm__ __volatile__ (
413 "\n"
414 #ifdef BOUNDED_EBX
415 "\tmovl %%ebx, %[old_ebx]\n" /* ebx is GOT pointer (-fPIC) */
416 #endif
417 /*
418 * initialization, load ESI, EDI, EBX registers
419 */
420 "\tmovl %[dst], %%edi\n"
421 "\tmovl %[src], %%esi\n"
422 "\tmovl %[sum], %%ebx\n"
423 "\tcmpl $0, %[size]\n"
424 "\tjnz 1f\n"
425 "\tjmp 6f\n"
426
427 "\t.p2align 4,,15\n"
428
429 "1:"
430
431 /*
432 * sample = *src;
433 * sum_sample = *sum;
434 * if (test_and_set_bit(0, dst) == 0)
435 * sample -= sum_sample;
436 * *sum += sample;
437 */
438 "\tmovsbl 2(%%esi), %%eax\n"
439 "\tmovzwl (%%esi), %%ecx\n"
440 "\tmovl (%%ebx), %%edx\n"
441 "\tsall $16, %%eax\n"
442 "\torl %%eax, %%ecx\n"
443 "\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
444 "\tjc 2f\n"
445 "\t" XSUB " %%edx, %%ecx\n"
446 "2:"
447 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
448
449 /*
450 * do {
451 * sample = old_sample = *sum;
452 * saturate(sample);
453 * *dst = sample | 1;
454 * } while (old_sample != *sum);
455 */
456
457 "3:"
458 "\tmovl (%%ebx), %%ecx\n"
459 /*
460 * if (sample > 0x7fffff)
461 */
462 "\tmovl $0x7fffff, %%eax\n"
463 "\tcmpl %%eax, %%ecx\n"
464 "\tjg 4f\n"
465 /*
466 * if (sample < -0x7fffff)
467 */
468 "\tmovl $-0x7fffff, %%eax\n"
469 "\tcmpl %%eax, %%ecx\n"
470 "\tjl 4f\n"
471 "\tmovl %%ecx, %%eax\n"
472 "\torl $1, %%eax\n"
473 "4:"
474 "\tmovw %%ax, (%%edi)\n"
475 "\tshrl $16, %%eax\n"
476 "\tmovb %%al, 2(%%edi)\n"
477 "\tcmpl %%ecx, (%%ebx)\n"
478 "\tjnz 3b\n"
479
480 /*
481 * while (size-- > 0)
482 */
483 "\tdecl %[size]\n"
484 "\tjz 6f\n"
485 "\tadd %[dst_step], %%edi\n"
486 "\tadd %[src_step], %%esi\n"
487 "\tadd %[sum_step], %%ebx\n"
488 "\tjmp 1b\n"
489
490 "6:"
491 #ifdef BOUNDED_EBX
492 "\tmovl %[old_ebx], %%ebx\n" /* ebx is GOT pointer (-fPIC) */
493 #endif
494 : [size] "+&rm" (size)
495 #ifdef BOUNDED_EBX
496 , [old_ebx] "=m" (old_ebx)
497 #endif
498 : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
499 [dst_step] "im" (dst_step), [src_step] "im" (src_step),
500 [sum_step] "im" (sum_step)
501 : "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
502 #ifndef BOUNDED_EBX
503 , "ebx"
504 #endif
505 );
506 }
507
508 /*
509 * 24-bit version for Pentium Pro/II
510 */
MIX_AREAS_24_CMOV(unsigned int size,volatile unsigned char * dst,unsigned char * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)511 static void MIX_AREAS_24_CMOV(unsigned int size,
512 volatile unsigned char *dst, unsigned char *src,
513 volatile signed int *sum, size_t dst_step,
514 size_t src_step, size_t sum_step)
515 {
516 #ifdef BOUNDED_EBX
517 unsigned int old_ebx;
518 #endif
519 /*
520 * ESI - src
521 * EDI - dst
522 * EBX - sum
523 * ECX - old sample
524 * EAX - sample / temporary
525 * EDX - temporary
526 */
527 __asm__ __volatile__ (
528 "\n"
529 #ifdef BOUNDED_EBX
530 "\tmovl %%ebx, %[old_ebx]\n" /* ebx is GOT pointer (-fPIC) */
531 #endif
532 /*
533 * initialization, load ESI, EDI, EBX registers
534 */
535 "\tmovl %[dst], %%edi\n"
536 "\tmovl %[src], %%esi\n"
537 "\tmovl %[sum], %%ebx\n"
538 "\tcmpl $0, %[size]\n"
539 "\tjz 6f\n"
540
541 "\t.p2align 4,,15\n"
542
543 "1:"
544
545 /*
546 * sample = *src;
547 * sum_sample = *sum;
548 * if (test_and_set_bit(0, dst) == 0)
549 * sample -= sum_sample;
550 * *sum += sample;
551 */
552 "\tmovsbl 2(%%esi), %%eax\n"
553 "\tmovzwl (%%esi), %%ecx\n"
554 "\tmovl (%%ebx), %%edx\n"
555 "\tsall $16, %%eax\n"
556 "\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
557 "\tleal (%%ecx,%%eax,1), %%ecx\n"
558 "\tjc 2f\n"
559 "\t" XSUB " %%edx, %%ecx\n"
560 "2:"
561 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
562
563 /*
564 * do {
565 * sample = old_sample = *sum;
566 * saturate(sample);
567 * *dst = sample | 1;
568 * } while (old_sample != *sum);
569 */
570
571 "3:"
572 "\tmovl (%%ebx), %%ecx\n"
573
574 "\tmovl $0x7fffff, %%eax\n"
575 "\tmovl $-0x7fffff, %%edx\n"
576 "\tcmpl %%eax, %%ecx\n"
577 "\tcmovng %%ecx, %%eax\n"
578 "\tcmpl %%edx, %%ecx\n"
579 "\tcmovl %%edx, %%eax\n"
580
581 "\torl $1, %%eax\n"
582 "\tmovw %%ax, (%%edi)\n"
583 "\tshrl $16, %%eax\n"
584 "\tmovb %%al, 2(%%edi)\n"
585
586 "\tcmpl %%ecx, (%%ebx)\n"
587 "\tjnz 3b\n"
588
589 /*
590 * while (size-- > 0)
591 */
592 "\tadd %[dst_step], %%edi\n"
593 "\tadd %[src_step], %%esi\n"
594 "\tadd %[sum_step], %%ebx\n"
595 "\tdecl %[size]\n"
596 "\tjnz 1b\n"
597
598 "6:"
599 #ifdef BOUNDED_EBX
600 "\tmovl %[old_ebx], %%ebx\n" /* ebx is GOT pointer (-fPIC) */
601 #endif
602 : [size] "+&rm" (size)
603 #ifdef BOUNDED_EBX
604 , [old_ebx] "=m" (old_ebx)
605 #endif
606 : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
607 [dst_step] "im" (dst_step), [src_step] "im" (src_step),
608 [sum_step] "im" (sum_step)
609 : "esi", "edi", "edx", "ecx", "eax", "memory", "cc"
610 #ifndef BOUNDED_EBX
611 , "ebx"
612 #endif
613 );
614 }
615
616 #ifdef BOUNDED_EBX
617 # undef BOUNDED_EBX
618 #endif
619