1 /**
2 * \file pcm/pcm_dmix_x86_64.h
3 * \ingroup PCM_Plugins
4 * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - X86-64 assembler code
5 * \author Takashi Iwai <tiwai@suse.de>
6 * \date 2003
7 */
8 /*
9 * PCM - Direct Stream Mixing
10 * Copyright (c) 2003 by Jaroslav Kysela <perex@perex.cz>
11 * Takashi Iwai <tiwai@suse.de>
12 *
13 *
14 * This library is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License as
16 * published by the Free Software Foundation; either version 2.1 of
17 * the License, or (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Lesser General Public License for more details.
23 *
24 * You should have received a copy of the GNU Lesser General Public
25 * License along with this library; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 *
28 */
29
30 #if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
31 # define BOUNDED_RBX
32 #endif
33
34 /*
35 * MMX optimized
36 */
MIX_AREAS_16(unsigned int size,volatile signed short * dst,signed short * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)37 static void MIX_AREAS_16(unsigned int size,
38 volatile signed short *dst, signed short *src,
39 volatile signed int *sum, size_t dst_step,
40 size_t src_step, size_t sum_step)
41 {
42 #ifdef BOUNDED_RBX
43 unsigned long long old_rbx;
44 #endif
45 /*
46 * RSI - src
47 * RDI - dst
48 * RBX - sum
49 * ECX - old sample
50 * EAX - sample / temporary
51 * EDX - temporary
52 */
53 __asm__ __volatile__ (
54 "\n"
55 #ifdef BOUNDED_RBX
56 "\tmovq %%rbx, %[old_rbx]\n"
57 #endif
58 /*
59 * initialization, load RSI, RDI, RBX registers
60 */
61 #ifndef _ILP32
62 "\tmovq %[dst], %%rdi\n"
63 "\tmovq %[src], %%rsi\n"
64 "\tmovq %[sum], %%rbx\n"
65 #else
66 "\tmovl %[dst], %%edi\n"
67 "\tmovl %[src], %%esi\n"
68 "\tmovl %[sum], %%ebx\n"
69 #endif
70
71 /*
72 * while (size-- > 0) {
73 */
74 "\tcmpl $0, %[size]\n"
75 "jz 6f\n"
76
77 "\t.p2align 4,,15\n"
78
79 "1:"
80
81 /*
82 * sample = *src;
83 * sum_sample = *sum;
84 * if (cmpxchg(*dst, 0, 1) == 0)
85 * sample -= sum_sample;
86 * xadd(*sum, sample);
87 */
88 "\tmovw $0, %%ax\n"
89 "\tmovw $1, %%cx\n"
90 "\tmovl (%%rbx), %%edx\n"
91 "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%rdi)\n"
92 "\tmovswl (%%rsi), %%ecx\n"
93 "\tjnz 2f\n"
94 "\t" XSUB " %%edx, %%ecx\n"
95 "2:"
96 "\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n"
97
98 /*
99 * do {
100 * sample = old_sample = *sum;
101 * saturate(v);
102 * *dst = sample;
103 * } while (v != *sum);
104 */
105
106 "3:"
107 "\tmovl (%%rbx), %%ecx\n"
108 "\tmovd %%ecx, %%mm0\n"
109 "\tpackssdw %%mm1, %%mm0\n"
110 "\tmovd %%mm0, %%eax\n"
111 "\tmovw %%ax, (%%rdi)\n"
112 "\tcmpl %%ecx, (%%rbx)\n"
113 "\tjnz 3b\n"
114
115 /*
116 * while (size-- > 0)
117 */
118 #ifndef _ILP32
119 "\taddq %[dst_step], %%rdi\n"
120 "\taddq %[src_step], %%rsi\n"
121 "\taddq %[sum_step], %%rbx\n"
122 #else
123 "\taddl %[dst_step], %%edi\n"
124 "\taddl %[src_step], %%esi\n"
125 "\taddl %[sum_step], %%ebx\n"
126 #endif
127 "\tdecl %[size]\n"
128 "\tjnz 1b\n"
129
130 "6:"
131
132 "\temms\n"
133 #ifdef BOUNDED_RBX
134 "\tmovq %[old_rbx], %%rbx\n"
135 #endif
136 : [size] "+&rm" (size)
137 #ifdef BOUNDED_RBX
138 , [old_rbx] "=m" (old_rbx)
139 #endif
140 : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
141 [dst_step] "im" (dst_step), [src_step] "im" (src_step),
142 [sum_step] "im" (sum_step)
143 : "rsi", "rdi", "edx", "ecx", "eax", "memory", "cc"
144 #ifndef BOUNDED_RBX
145 , "rbx"
146 #endif
147 #ifdef HAVE_MMX
148 , "mm0"
149 #else
150 , "st", "st(1)", "st(2)", "st(3)",
151 "st(4)", "st(5)", "st(6)", "st(7)"
152 #endif
153 );
154 }
155
156 /*
157 * 32-bit version (24-bit resolution)
158 */
MIX_AREAS_32(unsigned int size,volatile signed int * dst,signed int * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)159 static void MIX_AREAS_32(unsigned int size,
160 volatile signed int *dst, signed int *src,
161 volatile signed int *sum, size_t dst_step,
162 size_t src_step, size_t sum_step)
163 {
164 #ifdef BOUNDED_RBX
165 unsigned long long old_rbx;
166 #endif
167 /*
168 * RSI - src
169 * RDI - dst
170 * RBX - sum
171 * ECX - old sample
172 * EAX - sample / temporary
173 * EDX - temporary
174 */
175 __asm__ __volatile__ (
176 "\n"
177 #ifdef BOUNDED_RBX
178 "\tmovq %%rbx, %[old_rbx]\n"
179 #endif
180 /*
181 * initialization, load RSI, RDI, RBX registers
182 */
183 #ifndef _ILP32
184 "\tmovq %[dst], %%rdi\n"
185 "\tmovq %[src], %%rsi\n"
186 "\tmovq %[sum], %%rbx\n"
187 #else
188 "\tmovl %[dst], %%edi\n"
189 "\tmovl %[src], %%esi\n"
190 "\tmovl %[sum], %%ebx\n"
191 #endif
192
193 /*
194 * while (size-- > 0) {
195 */
196 "\tcmpl $0, %[size]\n"
197 "jz 6f\n"
198
199 "\t.p2align 4,,15\n"
200
201 "1:"
202
203 /*
204 * sample = *src;
205 * sum_sample = *sum;
206 * if (cmpxchg(*dst, 0, 1) == 0)
207 * sample -= sum_sample;
208 * xadd(*sum, sample);
209 */
210 "\tmovl $0, %%eax\n"
211 "\tmovl $1, %%ecx\n"
212 "\tmovl (%%rbx), %%edx\n"
213 "\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%rdi)\n"
214 "\tjnz 2f\n"
215 "\tmovl (%%rsi), %%ecx\n"
216 /* sample >>= 8 */
217 "\tsarl $8, %%ecx\n"
218 "\t" XSUB " %%edx, %%ecx\n"
219 "\tjmp 21f\n"
220 "2:"
221 "\tmovl (%%rsi), %%ecx\n"
222 /* sample >>= 8 */
223 "\tsarl $8, %%ecx\n"
224 "21:"
225 "\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n"
226
227 /*
228 * do {
229 * sample = old_sample = *sum;
230 * saturate(v);
231 * *dst = sample;
232 * } while (v != *sum);
233 */
234
235 "3:"
236 "\tmovl (%%rbx), %%ecx\n"
237 /*
238 * if (sample > 0x7fff00)
239 */
240 "\tmovl $0x7fffff, %%eax\n"
241 "\tcmpl %%eax, %%ecx\n"
242 "\tjg 4f\n"
243 /*
244 * if (sample < -0x800000)
245 */
246 "\tmovl $-0x800000, %%eax\n"
247 "\tcmpl %%eax, %%ecx\n"
248 "\tjl 4f\n"
249 "\tmovl %%ecx, %%eax\n"
250 "4:"
251 /*
252 * sample <<= 8;
253 */
254 "\tsall $8, %%eax\n"
255 "\tmovl %%eax, (%%rdi)\n"
256 "\tcmpl %%ecx, (%%rbx)\n"
257 "\tjnz 3b\n"
258
259 /*
260 * while (size-- > 0)
261 */
262 #ifndef _ILP32
263 "\taddq %[dst_step], %%rdi\n"
264 "\taddq %[src_step], %%rsi\n"
265 "\taddq %[sum_step], %%rbx\n"
266 #else
267 "\taddl %[dst_step], %%edi\n"
268 "\taddl %[src_step], %%esi\n"
269 "\taddl %[sum_step], %%ebx\n"
270 #endif
271 "\tdecl %[size]\n"
272 "\tjnz 1b\n"
273
274 "6:"
275 #ifdef BOUNDED_RBX
276 "\tmovq %[old_rbx], %%rbx\n"
277 #endif
278 : [size] "+&rm" (size)
279 #ifdef BOUNDED_RBX
280 , [old_rbx] "=m" (old_rbx)
281 #endif
282 : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
283 [dst_step] "im" (dst_step), [src_step] "im" (src_step),
284 [sum_step] "im" (sum_step)
285 : "rsi", "rdi", "edx", "ecx", "eax", "memory", "cc"
286 #ifndef BOUNDED_RBX
287 , "rbx"
288 #endif
289 );
290 }
291
292 /*
293 * 24-bit version
294 */
MIX_AREAS_24(unsigned int size,volatile unsigned char * dst,unsigned char * src,volatile signed int * sum,size_t dst_step,size_t src_step,size_t sum_step)295 static void MIX_AREAS_24(unsigned int size,
296 volatile unsigned char *dst, unsigned char *src,
297 volatile signed int *sum, size_t dst_step,
298 size_t src_step, size_t sum_step)
299 {
300 #ifdef BOUNDED_RBX
301 unsigned long long old_rbx;
302 #endif
303 /*
304 * RSI - src
305 * RDI - dst
306 * RBX - sum
307 * ECX - old sample
308 * EAX - sample / temporary
309 * EDX - temporary
310 */
311 __asm__ __volatile__ (
312 "\n"
313 #ifdef BOUNDED_RBX
314 "\tmovq %%rbx, %[old_rbx]\n"
315 #endif
316 /*
317 * initialization, load RSI, RDI, RBX registers
318 */
319 #ifndef _ILP32
320 "\tmovq %[dst], %%rdi\n"
321 "\tmovq %[src], %%rsi\n"
322 "\tmovq %[sum], %%rbx\n"
323 #else
324 "\tmovl %[dst], %%edi\n"
325 "\tmovl %[src], %%esi\n"
326 "\tmovl %[sum], %%ebx\n"
327 #endif
328
329 /*
330 * while (size-- > 0) {
331 */
332 "\tcmpl $0, %[size]\n"
333 "jz 6f\n"
334
335 "\t.p2align 4,,15\n"
336
337 "1:"
338
339 /*
340 * sample = *src;
341 * sum_sample = *sum;
342 * if (test_and_set_bit(0, dst) == 0)
343 * sample -= sum_sample;
344 * *sum += sample;
345 */
346 "\tmovsbl 2(%%rsi), %%eax\n"
347 "\tmovzwl (%%rsi), %%ecx\n"
348 "\tmovl (%%rbx), %%edx\n"
349 "\tsall $16, %%eax\n"
350 "\torl %%eax, %%ecx\n"
351 "\t" LOCK_PREFIX "btsw $0, (%%rdi)\n"
352 "\tjc 2f\n"
353 "\t" XSUB " %%edx, %%ecx\n"
354 "2:"
355 "\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n"
356
357 /*
358 * do {
359 * sample = old_sample = *sum;
360 * saturate(sample);
361 * *dst = sample | 1;
362 * } while (old_sample != *sum);
363 */
364
365 "3:"
366 "\tmovl (%%rbx), %%ecx\n"
367
368 "\tmovl $0x7fffff, %%eax\n"
369 "\tmovl $-0x7fffff, %%edx\n"
370 "\tcmpl %%eax, %%ecx\n"
371 "\tcmovng %%ecx, %%eax\n"
372 "\tcmpl %%edx, %%ecx\n"
373 "\tcmovl %%edx, %%eax\n"
374
375 "\torl $1, %%eax\n"
376 "\tmovw %%ax, (%%rdi)\n"
377 "\tshrl $16, %%eax\n"
378 "\tmovb %%al, 2(%%rdi)\n"
379
380 "\tcmpl %%ecx, (%%rbx)\n"
381 "\tjnz 3b\n"
382
383 /*
384 * while (size-- > 0)
385 */
386 #ifndef _ILP32
387 "\taddq %[dst_step], %%rdi\n"
388 "\taddq %[src_step], %%rsi\n"
389 "\taddq %[sum_step], %%rbx\n"
390 #else
391 "\taddl %[dst_step], %%edi\n"
392 "\taddl %[src_step], %%esi\n"
393 "\taddl %[sum_step], %%ebx\n"
394 #endif
395 "\tdecl %[size]\n"
396 "\tjnz 1b\n"
397
398 "6:"
399 #ifdef BOUNDED_RBX
400 "\tmovq %[old_rbx], %%rbx\n"
401 #endif
402 : [size] "+&rm" (size)
403 #ifdef BOUNDED_RBX
404 , [old_rbx] "=m" (old_rbx)
405 #endif
406 : [dst] "m" (dst), [src] "m" (src), [sum] "m" (sum),
407 [dst_step] "im" (dst_step), [src_step] "im" (src_step),
408 [sum_step] "im" (sum_step)
409 : "rsi", "rdi", "edx", "ecx", "eax", "memory", "cc"
410 #ifndef BOUNDED_RBX
411 , "rbx"
412 #endif
413 );
414 }
415
416 #ifdef BOUNDED_RBX
417 # undef BOUNDED_RBX
418 #endif
419