• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/* -----------------------------------------------------------------------
2   sysv.S - Copyright (c) 2017  Anthony Green
3          - Copyright (c) 2013  The Written Word, Inc.
4          - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
5
6   X86 Foreign Function Interface
7
8   Permission is hereby granted, free of charge, to any person obtaining
9   a copy of this software and associated documentation files (the
10   ``Software''), to deal in the Software without restriction, including
11   without limitation the rights to use, copy, modify, merge, publish,
12   distribute, sublicense, and/or sell copies of the Software, and to
13   permit persons to whom the Software is furnished to do so, subject to
14   the following conditions:
15
16   The above copyright notice and this permission notice shall be included
17   in all copies or substantial portions of the Software.
18
19   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
20   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26   DEALINGS IN THE SOFTWARE.
27   ----------------------------------------------------------------------- */
28
29#ifdef __i386__
30#ifndef _MSC_VER
31
32#define LIBFFI_ASM
33#include <fficonfig.h>
34#include <ffi.h>
35#include "internal.h"
36
37#define C2(X, Y)  X ## Y
38#define C1(X, Y)  C2(X, Y)
39#ifdef __USER_LABEL_PREFIX__
40# define C(X)     C1(__USER_LABEL_PREFIX__, X)
41#else
42# define C(X)     X
43#endif
44
45#ifdef X86_DARWIN
46# define L(X)     C1(L, X)
47#else
48# define L(X)     C1(.L, X)
49#endif
50
51#ifdef __ELF__
52# define ENDF(X)  .type	X,@function; .size X, . - X
53#else
54# define ENDF(X)
55#endif
56
57/* Handle win32 fastcall name mangling.  */
58#ifdef X86_WIN32
59# define ffi_call_i386		@ffi_call_i386@8
60# define ffi_closure_inner	@ffi_closure_inner@8
61#else
62# define ffi_call_i386		C(ffi_call_i386)
63# define ffi_closure_inner	C(ffi_closure_inner)
64#endif
65
66/* This macro allows the safe creation of jump tables without an
67   actual table.  The entry points into the table are all 8 bytes.
68   The use of ORG asserts that we're at the correct location.  */
69/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
70#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
71# define E(BASE, X)	.balign 8
72#else
73# define E(BASE, X)	.balign 8; .org BASE + X * 8
74#endif
75
76	.text
77	.balign	16
78	.globl	ffi_call_i386
79	FFI_HIDDEN(ffi_call_i386)
80
81/* This is declared as
82
83   void ffi_call_i386(struct call_frame *frame, char *argp)
84        __attribute__((fastcall));
85
86   Thus the arguments are present in
87
88        ecx: frame
89        edx: argp
90*/
91
92ffi_call_i386:
93L(UW0):
94	# cfi_startproc
95#if !HAVE_FASTCALL
96	movl	4(%esp), %ecx
97	movl	8(%esp), %edx
98#endif
99	movl	(%esp), %eax		/* move the return address */
100	movl	%ebp, (%ecx)		/* store %ebp into local frame */
101	movl	%eax, 4(%ecx)		/* store retaddr into local frame */
102
103	/* New stack frame based off ebp.  This is a itty bit of unwind
104	   trickery in that the CFA *has* changed.  There is no easy way
105	   to describe it correctly on entry to the function.  Fortunately,
106	   it doesn't matter too much since at all points we can correctly
107	   unwind back to ffi_call.  Note that the location to which we
108	   moved the return address is (the new) CFA-4, so from the
109	   perspective of the unwind info, it hasn't moved.  */
110	movl	%ecx, %ebp
111L(UW1):
112	# cfi_def_cfa(%ebp, 8)
113	# cfi_rel_offset(%ebp, 0)
114
115	movl	%edx, %esp		/* set outgoing argument stack */
116	movl	20+R_EAX*4(%ebp), %eax	/* set register arguments */
117	movl	20+R_EDX*4(%ebp), %edx
118	movl	20+R_ECX*4(%ebp), %ecx
119
120	call	*8(%ebp)
121
122	movl	12(%ebp), %ecx		/* load return type code */
123	movl	%ebx, 8(%ebp)		/* preserve %ebx */
124L(UW2):
125	# cfi_rel_offset(%ebx, 8)
126
127	andl	$X86_RET_TYPE_MASK, %ecx
128#ifdef __PIC__
129	call	C(__x86.get_pc_thunk.bx)
130L(pc1):
131	leal	L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx
132#else
133	leal	L(store_table)(,%ecx, 8), %ebx
134#endif
135	movl	16(%ebp), %ecx		/* load result address */
136	jmp	*%ebx
137
138	.balign	8
139L(store_table):
140E(L(store_table), X86_RET_FLOAT)
141	fstps	(%ecx)
142	jmp	L(e1)
143E(L(store_table), X86_RET_DOUBLE)
144	fstpl	(%ecx)
145	jmp	L(e1)
146E(L(store_table), X86_RET_LDOUBLE)
147	fstpt	(%ecx)
148	jmp	L(e1)
149E(L(store_table), X86_RET_SINT8)
150	movsbl	%al, %eax
151	mov	%eax, (%ecx)
152	jmp	L(e1)
153E(L(store_table), X86_RET_SINT16)
154	movswl	%ax, %eax
155	mov	%eax, (%ecx)
156	jmp	L(e1)
157E(L(store_table), X86_RET_UINT8)
158	movzbl	%al, %eax
159	mov	%eax, (%ecx)
160	jmp	L(e1)
161E(L(store_table), X86_RET_UINT16)
162	movzwl	%ax, %eax
163	mov	%eax, (%ecx)
164	jmp	L(e1)
165E(L(store_table), X86_RET_INT64)
166	movl	%edx, 4(%ecx)
167	/* fallthru */
168E(L(store_table), X86_RET_INT32)
169	movl	%eax, (%ecx)
170	/* fallthru */
171E(L(store_table), X86_RET_VOID)
172L(e1):
173	movl	8(%ebp), %ebx
174	movl	%ebp, %esp
175	popl	%ebp
176L(UW3):
177	# cfi_remember_state
178	# cfi_def_cfa(%esp, 4)
179	# cfi_restore(%ebx)
180	# cfi_restore(%ebp)
181	ret
182L(UW4):
183	# cfi_restore_state
184
185E(L(store_table), X86_RET_STRUCTPOP)
186	jmp	L(e1)
187E(L(store_table), X86_RET_STRUCTARG)
188	jmp	L(e1)
189E(L(store_table), X86_RET_STRUCT_1B)
190	movb	%al, (%ecx)
191	jmp	L(e1)
192E(L(store_table), X86_RET_STRUCT_2B)
193	movw	%ax, (%ecx)
194	jmp	L(e1)
195
196	/* Fill out the table so that bad values are predictable.  */
197E(L(store_table), X86_RET_UNUSED14)
198	ud2
199E(L(store_table), X86_RET_UNUSED15)
200	ud2
201
202L(UW5):
203	# cfi_endproc
204ENDF(ffi_call_i386)
205
206/* The inner helper is declared as
207
208   void ffi_closure_inner(struct closure_frame *frame, char *argp)
209	__attribute_((fastcall))
210
211   Thus the arguments are placed in
212
213	ecx:	frame
214	edx:	argp
215*/
216
217/* Macros to help setting up the closure_data structure.  */
218
219#if HAVE_FASTCALL
220# define closure_FS	(40 + 4)
221# define closure_CF	0
222#else
223# define closure_FS	(8 + 40 + 12)
224# define closure_CF	8
225#endif
226
227#define FFI_CLOSURE_SAVE_REGS		\
228	movl	%eax, closure_CF+16+R_EAX*4(%esp);	\
229	movl	%edx, closure_CF+16+R_EDX*4(%esp);	\
230	movl	%ecx, closure_CF+16+R_ECX*4(%esp)
231
232#define FFI_CLOSURE_COPY_TRAMP_DATA					\
233	movl	FFI_TRAMPOLINE_SIZE(%eax), %edx;	/* copy cif */	\
234	movl	FFI_TRAMPOLINE_SIZE+4(%eax), %ecx;	/* copy fun */	\
235	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %eax;	/* copy user_data */ \
236	movl	%edx, closure_CF+28(%esp);				\
237	movl	%ecx, closure_CF+32(%esp);				\
238	movl	%eax, closure_CF+36(%esp)
239
240#if HAVE_FASTCALL
241# define FFI_CLOSURE_PREP_CALL						\
242	movl	%esp, %ecx;			/* load closure_data */	\
243	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */
244#else
245# define FFI_CLOSURE_PREP_CALL						\
246	leal	closure_CF(%esp), %ecx;		/* load closure_data */	\
247	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ \
248	movl	%ecx, (%esp);						\
249	movl	%edx, 4(%esp)
250#endif
251
252#define FFI_CLOSURE_CALL_INNER(UWN) \
253	call	ffi_closure_inner
254
255#define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\
256	andl	$X86_RET_TYPE_MASK, %eax;				\
257	leal	L(C1(load_table,N))(, %eax, 8), %edx;			\
258	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
259	jmp	*%edx
260
261#ifdef __PIC__
262# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
263#  undef FFI_CLOSURE_MASK_AND_JUMP
264#  define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\
265	andl	$X86_RET_TYPE_MASK, %eax;				\
266	call	C(__x86.get_pc_thunk.dx);				\
267L(C1(pc,N)):								\
268	leal	L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx;	\
269	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
270	jmp	*%edx
271# else
272#  define FFI_CLOSURE_CALL_INNER_SAVE_EBX
273#  undef FFI_CLOSURE_CALL_INNER
274#  define FFI_CLOSURE_CALL_INNER(UWN)					\
275	movl	%ebx, 40(%esp);			/* save ebx */		\
276L(C1(UW,UWN)):								\
277	/* cfi_rel_offset(%ebx, 40); */					\
278	call	C(__x86.get_pc_thunk.bx);	/* load got register */	\
279	addl	$C(_GLOBAL_OFFSET_TABLE_), %ebx;			\
280	call	ffi_closure_inner@PLT
281#  undef FFI_CLOSURE_MASK_AND_JUMP
282#  define FFI_CLOSURE_MASK_AND_JUMP(N, UWN)				\
283	andl	$X86_RET_TYPE_MASK, %eax;				\
284	leal	L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx;	\
285	movl	40(%esp), %ebx;			/* restore ebx */	\
286L(C1(UW,UWN)):								\
287	/* cfi_restore(%ebx); */					\
288	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
289	jmp	*%edx
290# endif /* DARWIN || HIDDEN */
291#endif /* __PIC__ */
292
293	.balign	16
294	.globl	C(ffi_go_closure_EAX)
295	FFI_HIDDEN(C(ffi_go_closure_EAX))
296C(ffi_go_closure_EAX):
297L(UW6):
298	# cfi_startproc
299	subl	$closure_FS, %esp
300L(UW7):
301	# cfi_def_cfa_offset(closure_FS + 4)
302	FFI_CLOSURE_SAVE_REGS
303	movl	4(%eax), %edx			/* copy cif */
304	movl	8(%eax), %ecx			/* copy fun */
305	movl	%edx, closure_CF+28(%esp)
306	movl	%ecx, closure_CF+32(%esp)
307	movl	%eax, closure_CF+36(%esp)	/* closure is user_data */
308	jmp	L(do_closure_i386)
309L(UW8):
310	# cfi_endproc
311ENDF(C(ffi_go_closure_EAX))
312
313	.balign	16
314	.globl	C(ffi_go_closure_ECX)
315	FFI_HIDDEN(C(ffi_go_closure_ECX))
316C(ffi_go_closure_ECX):
317L(UW9):
318	# cfi_startproc
319	subl	$closure_FS, %esp
320L(UW10):
321	# cfi_def_cfa_offset(closure_FS + 4)
322	FFI_CLOSURE_SAVE_REGS
323	movl	4(%ecx), %edx			/* copy cif */
324	movl	8(%ecx), %eax			/* copy fun */
325	movl	%edx, closure_CF+28(%esp)
326	movl	%eax, closure_CF+32(%esp)
327	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
328	jmp	L(do_closure_i386)
329L(UW11):
330	# cfi_endproc
331ENDF(C(ffi_go_closure_ECX))
332
333/* The closure entry points are reached from the ffi_closure trampoline.
334   On entry, %eax contains the address of the ffi_closure.  */
335
336	.balign	16
337	.globl	C(ffi_closure_i386)
338	FFI_HIDDEN(C(ffi_closure_i386))
339
340C(ffi_closure_i386):
341L(UW12):
342	# cfi_startproc
343	subl	$closure_FS, %esp
344L(UW13):
345	# cfi_def_cfa_offset(closure_FS + 4)
346
347	FFI_CLOSURE_SAVE_REGS
348	FFI_CLOSURE_COPY_TRAMP_DATA
349
350	/* Entry point from preceeding Go closures.  */
351L(do_closure_i386):
352
353	FFI_CLOSURE_PREP_CALL
354	FFI_CLOSURE_CALL_INNER(14)
355	FFI_CLOSURE_MASK_AND_JUMP(2, 15)
356
357	.balign	8
358L(load_table2):
359E(L(load_table2), X86_RET_FLOAT)
360	flds	closure_CF(%esp)
361	jmp	L(e2)
362E(L(load_table2), X86_RET_DOUBLE)
363	fldl	closure_CF(%esp)
364	jmp	L(e2)
365E(L(load_table2), X86_RET_LDOUBLE)
366	fldt	closure_CF(%esp)
367	jmp	L(e2)
368E(L(load_table2), X86_RET_SINT8)
369	movsbl	%al, %eax
370	jmp	L(e2)
371E(L(load_table2), X86_RET_SINT16)
372	movswl	%ax, %eax
373	jmp	L(e2)
374E(L(load_table2), X86_RET_UINT8)
375	movzbl	%al, %eax
376	jmp	L(e2)
377E(L(load_table2), X86_RET_UINT16)
378	movzwl	%ax, %eax
379	jmp	L(e2)
380E(L(load_table2), X86_RET_INT64)
381	movl	closure_CF+4(%esp), %edx
382	jmp	L(e2)
383E(L(load_table2), X86_RET_INT32)
384	nop
385	/* fallthru */
386E(L(load_table2), X86_RET_VOID)
387L(e2):
388	addl	$closure_FS, %esp
389L(UW16):
390	# cfi_adjust_cfa_offset(-closure_FS)
391	ret
392L(UW17):
393	# cfi_adjust_cfa_offset(closure_FS)
394E(L(load_table2), X86_RET_STRUCTPOP)
395	addl	$closure_FS, %esp
396L(UW18):
397	# cfi_adjust_cfa_offset(-closure_FS)
398	ret	$4
399L(UW19):
400	# cfi_adjust_cfa_offset(closure_FS)
401E(L(load_table2), X86_RET_STRUCTARG)
402	jmp	L(e2)
403E(L(load_table2), X86_RET_STRUCT_1B)
404	movzbl	%al, %eax
405	jmp	L(e2)
406E(L(load_table2), X86_RET_STRUCT_2B)
407	movzwl	%ax, %eax
408	jmp	L(e2)
409
410	/* Fill out the table so that bad values are predictable.  */
411E(L(load_table2), X86_RET_UNUSED14)
412	ud2
413E(L(load_table2), X86_RET_UNUSED15)
414	ud2
415
416L(UW20):
417	# cfi_endproc
418ENDF(C(ffi_closure_i386))
419
420	.balign	16
421	.globl	C(ffi_go_closure_STDCALL)
422	FFI_HIDDEN(C(ffi_go_closure_STDCALL))
423C(ffi_go_closure_STDCALL):
424L(UW21):
425	# cfi_startproc
426	subl	$closure_FS, %esp
427L(UW22):
428	# cfi_def_cfa_offset(closure_FS + 4)
429	FFI_CLOSURE_SAVE_REGS
430	movl	4(%ecx), %edx			/* copy cif */
431	movl	8(%ecx), %eax			/* copy fun */
432	movl	%edx, closure_CF+28(%esp)
433	movl	%eax, closure_CF+32(%esp)
434	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
435	jmp	L(do_closure_STDCALL)
436L(UW23):
437	# cfi_endproc
438ENDF(C(ffi_go_closure_STDCALL))
439
440/* For REGISTER, we have no available parameter registers, and so we
441   enter here having pushed the closure onto the stack.  */
442
443	.balign	16
444	.globl	C(ffi_closure_REGISTER)
445	FFI_HIDDEN(C(ffi_closure_REGISTER))
446C(ffi_closure_REGISTER):
447L(UW24):
448	# cfi_startproc
449	# cfi_def_cfa(%esp, 8)
450	# cfi_offset(%eip, -8)
451	subl	$closure_FS-4, %esp
452L(UW25):
453	# cfi_def_cfa_offset(closure_FS + 4)
454	FFI_CLOSURE_SAVE_REGS
455	movl	closure_FS-4(%esp), %ecx	/* load retaddr */
456	movl	closure_FS(%esp), %eax		/* load closure */
457	movl	%ecx, closure_FS(%esp)		/* move retaddr */
458	jmp	L(do_closure_REGISTER)
459L(UW26):
460	# cfi_endproc
461ENDF(C(ffi_closure_REGISTER))
462
463/* For STDCALL (and others), we need to pop N bytes of arguments off
464   the stack following the closure.  The amount needing to be popped
465   is returned to us from ffi_closure_inner.  */
466
467	.balign	16
468	.globl	C(ffi_closure_STDCALL)
469	FFI_HIDDEN(C(ffi_closure_STDCALL))
470C(ffi_closure_STDCALL):
471L(UW27):
472	# cfi_startproc
473	subl	$closure_FS, %esp
474L(UW28):
475	# cfi_def_cfa_offset(closure_FS + 4)
476
477	FFI_CLOSURE_SAVE_REGS
478
479	/* Entry point from ffi_closure_REGISTER.  */
480L(do_closure_REGISTER):
481
482	FFI_CLOSURE_COPY_TRAMP_DATA
483
484	/* Entry point from preceeding Go closure.  */
485L(do_closure_STDCALL):
486
487	FFI_CLOSURE_PREP_CALL
488	FFI_CLOSURE_CALL_INNER(29)
489
490	movl	%eax, %ecx
491	shrl	$X86_RET_POP_SHIFT, %ecx	/* isolate pop count */
492	leal	closure_FS(%esp, %ecx), %ecx	/* compute popped esp */
493	movl	closure_FS(%esp), %edx		/* move return address */
494	movl	%edx, (%ecx)
495
496	/* From this point on, the value of %esp upon return is %ecx+4,
497	   and we've copied the return address to %ecx to make return easy.
498	   There's no point in representing this in the unwind info, as
499	   there is always a window between the mov and the ret which
500	   will be wrong from one point of view or another.  */
501
502	FFI_CLOSURE_MASK_AND_JUMP(3, 30)
503
504	.balign	8
505L(load_table3):
506E(L(load_table3), X86_RET_FLOAT)
507	flds    closure_CF(%esp)
508	movl    %ecx, %esp
509	ret
510E(L(load_table3), X86_RET_DOUBLE)
511	fldl    closure_CF(%esp)
512	movl    %ecx, %esp
513	ret
514E(L(load_table3), X86_RET_LDOUBLE)
515	fldt    closure_CF(%esp)
516	movl    %ecx, %esp
517	ret
518E(L(load_table3), X86_RET_SINT8)
519	movsbl  %al, %eax
520	movl    %ecx, %esp
521	ret
522E(L(load_table3), X86_RET_SINT16)
523	movswl  %ax, %eax
524	movl    %ecx, %esp
525	ret
526E(L(load_table3), X86_RET_UINT8)
527	movzbl  %al, %eax
528	movl    %ecx, %esp
529	ret
530E(L(load_table3), X86_RET_UINT16)
531	movzwl  %ax, %eax
532	movl    %ecx, %esp
533	ret
534E(L(load_table3), X86_RET_INT64)
535	movl	closure_CF+4(%esp), %edx
536	movl    %ecx, %esp
537	ret
538E(L(load_table3), X86_RET_INT32)
539	movl    %ecx, %esp
540	ret
541E(L(load_table3), X86_RET_VOID)
542	movl    %ecx, %esp
543	ret
544E(L(load_table3), X86_RET_STRUCTPOP)
545	movl    %ecx, %esp
546	ret
547E(L(load_table3), X86_RET_STRUCTARG)
548	movl	%ecx, %esp
549	ret
550E(L(load_table3), X86_RET_STRUCT_1B)
551	movzbl	%al, %eax
552	movl	%ecx, %esp
553	ret
554E(L(load_table3), X86_RET_STRUCT_2B)
555	movzwl	%ax, %eax
556	movl	%ecx, %esp
557	ret
558
559	/* Fill out the table so that bad values are predictable.  */
560E(L(load_table3), X86_RET_UNUSED14)
561	ud2
562E(L(load_table3), X86_RET_UNUSED15)
563	ud2
564
565L(UW31):
566	# cfi_endproc
567ENDF(C(ffi_closure_STDCALL))
568
569#if !FFI_NO_RAW_API
570
571#define raw_closure_S_FS	(16+16+12)
572
573	.balign	16
574	.globl	C(ffi_closure_raw_SYSV)
575	FFI_HIDDEN(C(ffi_closure_raw_SYSV))
576C(ffi_closure_raw_SYSV):
577L(UW32):
578	# cfi_startproc
579	subl	$raw_closure_S_FS, %esp
580L(UW33):
581	# cfi_def_cfa_offset(raw_closure_S_FS + 4)
582	movl	%ebx, raw_closure_S_FS-4(%esp)
583L(UW34):
584	# cfi_rel_offset(%ebx, raw_closure_S_FS-4)
585
586	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */
587	movl	%edx, 12(%esp)
588	leal	raw_closure_S_FS+4(%esp), %edx		/* load raw_args */
589	movl	%edx, 8(%esp)
590	leal	16(%esp), %edx				/* load &res */
591	movl	%edx, 4(%esp)
592	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */
593	movl	%ebx, (%esp)
594	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */
595
596	movl	20(%ebx), %eax				/* load cif->flags */
597	andl	$X86_RET_TYPE_MASK, %eax
598#ifdef __PIC__
599	call	C(__x86.get_pc_thunk.bx)
600L(pc4):
601	leal	L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
602#else
603	leal	L(load_table4)(,%eax, 8), %ecx
604#endif
605	movl	raw_closure_S_FS-4(%esp), %ebx
606L(UW35):
607	# cfi_restore(%ebx)
608	movl	16(%esp), %eax				/* Optimistic load */
609	jmp	*%ecx
610
611	.balign	8
612L(load_table4):
613E(L(load_table4), X86_RET_FLOAT)
614	flds	16(%esp)
615	jmp	L(e4)
616E(L(load_table4), X86_RET_DOUBLE)
617	fldl	16(%esp)
618	jmp	L(e4)
619E(L(load_table4), X86_RET_LDOUBLE)
620	fldt	16(%esp)
621	jmp	L(e4)
622E(L(load_table4), X86_RET_SINT8)
623	movsbl	%al, %eax
624	jmp	L(e4)
625E(L(load_table4), X86_RET_SINT16)
626	movswl	%ax, %eax
627	jmp	L(e4)
628E(L(load_table4), X86_RET_UINT8)
629	movzbl	%al, %eax
630	jmp	L(e4)
631E(L(load_table4), X86_RET_UINT16)
632	movzwl	%ax, %eax
633	jmp	L(e4)
634E(L(load_table4), X86_RET_INT64)
635	movl	16+4(%esp), %edx
636	jmp	L(e4)
637E(L(load_table4), X86_RET_INT32)
638	nop
639	/* fallthru */
640E(L(load_table4), X86_RET_VOID)
641L(e4):
642	addl	$raw_closure_S_FS, %esp
643L(UW36):
644	# cfi_adjust_cfa_offset(-raw_closure_S_FS)
645	ret
646L(UW37):
647	# cfi_adjust_cfa_offset(raw_closure_S_FS)
648E(L(load_table4), X86_RET_STRUCTPOP)
649	addl	$raw_closure_S_FS, %esp
650L(UW38):
651	# cfi_adjust_cfa_offset(-raw_closure_S_FS)
652	ret	$4
653L(UW39):
654	# cfi_adjust_cfa_offset(raw_closure_S_FS)
655E(L(load_table4), X86_RET_STRUCTARG)
656	jmp	L(e4)
657E(L(load_table4), X86_RET_STRUCT_1B)
658	movzbl	%al, %eax
659	jmp	L(e4)
660E(L(load_table4), X86_RET_STRUCT_2B)
661	movzwl	%ax, %eax
662	jmp	L(e4)
663
664	/* Fill out the table so that bad values are predictable.  */
665E(L(load_table4), X86_RET_UNUSED14)
666	ud2
667E(L(load_table4), X86_RET_UNUSED15)
668	ud2
669
670L(UW40):
671	# cfi_endproc
672ENDF(C(ffi_closure_raw_SYSV))
673
674#define raw_closure_T_FS	(16+16+8)
675
676	.balign	16
677	.globl	C(ffi_closure_raw_THISCALL)
678	FFI_HIDDEN(C(ffi_closure_raw_THISCALL))
679C(ffi_closure_raw_THISCALL):
680L(UW41):
681	# cfi_startproc
682	/* Rearrange the stack such that %ecx is the first argument.
683	   This means moving the return address.  */
684	popl	%edx
685L(UW42):
686	# cfi_def_cfa_offset(0)
687	# cfi_register(%eip, %edx)
688	pushl	%ecx
689L(UW43):
690	# cfi_adjust_cfa_offset(4)
691	pushl	%edx
692L(UW44):
693	# cfi_adjust_cfa_offset(4)
694	# cfi_rel_offset(%eip, 0)
695	subl	$raw_closure_T_FS, %esp
696L(UW45):
697	# cfi_adjust_cfa_offset(raw_closure_T_FS)
698	movl	%ebx, raw_closure_T_FS-4(%esp)
699L(UW46):
700	# cfi_rel_offset(%ebx, raw_closure_T_FS-4)
701
702	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */
703	movl	%edx, 12(%esp)
704	leal	raw_closure_T_FS+4(%esp), %edx		/* load raw_args */
705	movl	%edx, 8(%esp)
706	leal	16(%esp), %edx				/* load &res */
707	movl	%edx, 4(%esp)
708	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */
709	movl	%ebx, (%esp)
710	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */
711
712	movl	20(%ebx), %eax				/* load cif->flags */
713	andl	$X86_RET_TYPE_MASK, %eax
714#ifdef __PIC__
715	call	C(__x86.get_pc_thunk.bx)
716L(pc5):
717	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
718#else
719	leal	L(load_table5)(,%eax, 8), %ecx
720#endif
721	movl	raw_closure_T_FS-4(%esp), %ebx
722L(UW47):
723	# cfi_restore(%ebx)
724	movl	16(%esp), %eax				/* Optimistic load */
725	jmp	*%ecx
726
727	.balign	8
728L(load_table5):
729E(L(load_table5), X86_RET_FLOAT)
730	flds	16(%esp)
731	jmp	L(e5)
732E(L(load_table5), X86_RET_DOUBLE)
733	fldl	16(%esp)
734	jmp	L(e5)
735E(L(load_table5), X86_RET_LDOUBLE)
736	fldt	16(%esp)
737	jmp	L(e5)
738E(L(load_table5), X86_RET_SINT8)
739	movsbl	%al, %eax
740	jmp	L(e5)
741E(L(load_table5), X86_RET_SINT16)
742	movswl	%ax, %eax
743	jmp	L(e5)
744E(L(load_table5), X86_RET_UINT8)
745	movzbl	%al, %eax
746	jmp	L(e5)
747E(L(load_table5), X86_RET_UINT16)
748	movzwl	%ax, %eax
749	jmp	L(e5)
750E(L(load_table5), X86_RET_INT64)
751	movl	16+4(%esp), %edx
752	jmp	L(e5)
753E(L(load_table5), X86_RET_INT32)
754	nop
755	/* fallthru */
756E(L(load_table5), X86_RET_VOID)
757L(e5):
758	addl	$raw_closure_T_FS, %esp
759L(UW48):
760	# cfi_adjust_cfa_offset(-raw_closure_T_FS)
761	/* Remove the extra %ecx argument we pushed.  */
762	ret	$4
763L(UW49):
764	# cfi_adjust_cfa_offset(raw_closure_T_FS)
765E(L(load_table5), X86_RET_STRUCTPOP)
766	addl	$raw_closure_T_FS, %esp
767L(UW50):
768	# cfi_adjust_cfa_offset(-raw_closure_T_FS)
769	ret	$8
770L(UW51):
771	# cfi_adjust_cfa_offset(raw_closure_T_FS)
772E(L(load_table5), X86_RET_STRUCTARG)
773	jmp	L(e5)
774E(L(load_table5), X86_RET_STRUCT_1B)
775	movzbl	%al, %eax
776	jmp	L(e5)
777E(L(load_table5), X86_RET_STRUCT_2B)
778	movzwl	%ax, %eax
779	jmp	L(e5)
780
781	/* Fill out the table so that bad values are predictable.  */
782E(L(load_table5), X86_RET_UNUSED14)
783	ud2
784E(L(load_table5), X86_RET_UNUSED15)
785	ud2
786
787L(UW52):
788	# cfi_endproc
789ENDF(C(ffi_closure_raw_THISCALL))
790
791#endif /* !FFI_NO_RAW_API */
792
793#ifdef X86_DARWIN
794# define COMDAT(X)							\
795        .section __TEXT,__text,coalesced,pure_instructions;		\
796        .weak_definition X;						\
797        FFI_HIDDEN(X)
798#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
799# define COMDAT(X)							\
800	.section .text.X,"axG",@progbits,X,comdat;			\
801	.globl	X;							\
802	FFI_HIDDEN(X)
803#else
804# define COMDAT(X)
805#endif
806
807#if defined(__PIC__)
808	COMDAT(C(__x86.get_pc_thunk.bx))
809C(__x86.get_pc_thunk.bx):
810	movl	(%esp), %ebx
811	ret
812ENDF(C(__x86.get_pc_thunk.bx))
813# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
814	COMDAT(C(__x86.get_pc_thunk.dx))
815C(__x86.get_pc_thunk.dx):
816	movl	(%esp), %edx
817	ret
818ENDF(C(__x86.get_pc_thunk.dx))
819#endif /* DARWIN || HIDDEN */
820#endif /* __PIC__ */
821
822/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
823
824#ifdef __APPLE__
825.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
826EHFrame0:
827#elif defined(X86_WIN32)
828.section .eh_frame,"r"
829#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
830.section .eh_frame,EH_FRAME_FLAGS,@unwind
831#else
832.section .eh_frame,EH_FRAME_FLAGS,@progbits
833#endif
834
835#ifdef HAVE_AS_X86_PCREL
836# define PCREL(X)	X - .
837#else
838# define PCREL(X)	X@rel
839#endif
840
841/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
842#define ADV(N, P)	.byte 2, L(N)-L(P)
843
844	.balign 4
845L(CIE):
846	.set	L(set0),L(ECIE)-L(SCIE)
847	.long	L(set0)			/* CIE Length */
848L(SCIE):
849	.long	0			/* CIE Identifier Tag */
850	.byte	1			/* CIE Version */
851	.ascii	"zR\0"			/* CIE Augmentation */
852	.byte	1			/* CIE Code Alignment Factor */
853	.byte	0x7c			/* CIE Data Alignment Factor */
854	.byte	0x8			/* CIE RA Column */
855	.byte	1			/* Augmentation size */
856	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
857	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp offset 4 */
858	.byte	0x80+8, 1		/* DW_CFA_offset, %eip offset 1*-4 */
859	.balign 4
860L(ECIE):
861
862	.set	L(set1),L(EFDE1)-L(SFDE1)
863	.long	L(set1)			/* FDE Length */
864L(SFDE1):
865	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */
866	.long	PCREL(L(UW0))		/* Initial location */
867	.long	L(UW5)-L(UW0)		/* Address range */
868	.byte	0			/* Augmentation size */
869	ADV(UW1, UW0)
870	.byte	0xc, 5, 8		/* DW_CFA_def_cfa, %ebp 8 */
871	.byte	0x80+5, 2		/* DW_CFA_offset, %ebp 2*-4 */
872	ADV(UW2, UW1)
873	.byte	0x80+3, 0		/* DW_CFA_offset, %ebx 0*-4 */
874	ADV(UW3, UW2)
875	.byte	0xa			/* DW_CFA_remember_state */
876	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp 4 */
877	.byte	0xc0+3			/* DW_CFA_restore, %ebx */
878	.byte	0xc0+5			/* DW_CFA_restore, %ebp */
879	ADV(UW4, UW3)
880	.byte	0xb			/* DW_CFA_restore_state */
881	.balign	4
882L(EFDE1):
883
884	.set	L(set2),L(EFDE2)-L(SFDE2)
885	.long	L(set2)			/* FDE Length */
886L(SFDE2):
887	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */
888	.long	PCREL(L(UW6))		/* Initial location */
889	.long	L(UW8)-L(UW6)		/* Address range */
890	.byte	0			/* Augmentation size */
891	ADV(UW7, UW6)
892	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
893	.balign	4
894L(EFDE2):
895
896	.set	L(set3),L(EFDE3)-L(SFDE3)
897	.long	L(set3)			/* FDE Length */
898L(SFDE3):
899	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */
900	.long	PCREL(L(UW9))		/* Initial location */
901	.long	L(UW11)-L(UW9)		/* Address range */
902	.byte	0			/* Augmentation size */
903	ADV(UW10, UW9)
904	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
905	.balign	4
906L(EFDE3):
907
908	.set	L(set4),L(EFDE4)-L(SFDE4)
909	.long	L(set4)			/* FDE Length */
910L(SFDE4):
911	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */
912	.long	PCREL(L(UW12))		/* Initial location */
913	.long	L(UW20)-L(UW12)		/* Address range */
914	.byte	0			/* Augmentation size */
915	ADV(UW13, UW12)
916	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
917#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
918	ADV(UW14, UW13)
919	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
920	ADV(UW15, UW14)
921	.byte	0xc0+3			/* DW_CFA_restore %ebx */
922	ADV(UW16, UW15)
923#else
924	ADV(UW16, UW13)
925#endif
926	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
927	ADV(UW17, UW16)
928	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
929	ADV(UW18, UW17)
930	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
931	ADV(UW19, UW18)
932	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
933	.balign	4
934L(EFDE4):
935
936	.set	L(set5),L(EFDE5)-L(SFDE5)
937	.long	L(set5)			/* FDE Length */
938L(SFDE5):
939	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */
940	.long	PCREL(L(UW21))		/* Initial location */
941	.long	L(UW23)-L(UW21)		/* Address range */
942	.byte	0			/* Augmentation size */
943	ADV(UW22, UW21)
944	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
945	.balign	4
946L(EFDE5):
947
948	.set	L(set6),L(EFDE6)-L(SFDE6)
949	.long	L(set6)			/* FDE Length */
950L(SFDE6):
951	.long	L(SFDE6)-L(CIE)		/* FDE CIE offset */
952	.long	PCREL(L(UW24))		/* Initial location */
953	.long	L(UW26)-L(UW24)		/* Address range */
954	.byte	0			/* Augmentation size */
955	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
956	.byte	0x80+8, 2		/* DW_CFA_offset %eip, 2*-4 */
957	ADV(UW25, UW24)
958	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
959	.balign	4
960L(EFDE6):
961
962	.set	L(set7),L(EFDE7)-L(SFDE7)
963	.long	L(set7)			/* FDE Length */
964L(SFDE7):
965	.long	L(SFDE7)-L(CIE)		/* FDE CIE offset */
966	.long	PCREL(L(UW27))		/* Initial location */
967	.long	L(UW31)-L(UW27)		/* Address range */
968	.byte	0			/* Augmentation size */
969	ADV(UW28, UW27)
970	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
971#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
972	ADV(UW29, UW28)
973	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
974	ADV(UW30, UW29)
975	.byte	0xc0+3			/* DW_CFA_restore %ebx */
976#endif
977	.balign	4
978L(EFDE7):
979
980#if !FFI_NO_RAW_API
981	.set	L(set8),L(EFDE8)-L(SFDE8)
982	.long	L(set8)			/* FDE Length */
983L(SFDE8):
984	.long	L(SFDE8)-L(CIE)		/* FDE CIE offset */
985	.long	PCREL(L(UW32))		/* Initial location */
986	.long	L(UW40)-L(UW32)		/* Address range */
987	.byte	0			/* Augmentation size */
988	ADV(UW33, UW32)
989	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
990	ADV(UW34, UW33)
991	.byte	0x80+3, 2		/* DW_CFA_offset %ebx 2*-4 */
992	ADV(UW35, UW34)
993	.byte	0xc0+3			/* DW_CFA_restore %ebx */
994	ADV(UW36, UW35)
995	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
996	ADV(UW37, UW36)
997	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
998	ADV(UW38, UW37)
999	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
1000	ADV(UW39, UW38)
1001	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
1002	.balign	4
1003L(EFDE8):
1004
1005	.set	L(set9),L(EFDE9)-L(SFDE9)
1006	.long	L(set9)			/* FDE Length */
1007L(SFDE9):
1008	.long	L(SFDE9)-L(CIE)		/* FDE CIE offset */
1009	.long	PCREL(L(UW41))		/* Initial location */
1010	.long	L(UW52)-L(UW41)		/* Address range */
1011	.byte	0			/* Augmentation size */
1012	ADV(UW42, UW41)
1013	.byte	0xe, 0			/* DW_CFA_def_cfa_offset */
1014	.byte	0x9, 8, 2		/* DW_CFA_register %eip, %edx */
1015	ADV(UW43, UW42)
1016	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
1017	ADV(UW44, UW43)
1018	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
1019	.byte	0x80+8, 2		/* DW_CFA_offset %eip 2*-4 */
1020	ADV(UW45, UW44)
1021	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
1022	ADV(UW46, UW45)
1023	.byte	0x80+3, 3		/* DW_CFA_offset %ebx 3*-4 */
1024	ADV(UW47, UW46)
1025	.byte	0xc0+3			/* DW_CFA_restore %ebx */
1026	ADV(UW48, UW47)
1027	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
1028	ADV(UW49, UW48)
1029	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
1030	ADV(UW50, UW49)
1031	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
1032	ADV(UW51, UW50)
1033	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
1034	.balign	4
1035L(EFDE9):
1036#endif /* !FFI_NO_RAW_API */
1037
1038#ifdef _WIN32
1039	.def	 @feat.00;
1040	.scl	3;
1041	.type	0;
1042	.endef
1043	.globl	@feat.00
1044@feat.00 = 1
1045#endif
1046
1047#ifdef __APPLE__
1048    .subsections_via_symbols
1049    .section __LD,__compact_unwind,regular,debug
1050
1051    /* compact unwind for ffi_call_i386 */
1052    .long    C(ffi_call_i386)
1053    .set     L1,L(UW5)-L(UW0)
1054    .long    L1
1055    .long    0x04000000 /* use dwarf unwind info */
1056    .long    0
1057    .long    0
1058
1059    /* compact unwind for ffi_go_closure_EAX */
1060    .long    C(ffi_go_closure_EAX)
1061    .set     L2,L(UW8)-L(UW6)
1062    .long    L2
1063    .long    0x04000000 /* use dwarf unwind info */
1064    .long    0
1065    .long    0
1066
1067    /* compact unwind for ffi_go_closure_ECX */
1068    .long    C(ffi_go_closure_ECX)
1069    .set     L3,L(UW11)-L(UW9)
1070    .long    L3
1071    .long    0x04000000 /* use dwarf unwind info */
1072    .long    0
1073    .long    0
1074
1075    /* compact unwind for ffi_closure_i386 */
1076    .long    C(ffi_closure_i386)
1077    .set     L4,L(UW20)-L(UW12)
1078    .long    L4
1079    .long    0x04000000 /* use dwarf unwind info */
1080    .long    0
1081    .long    0
1082
1083    /* compact unwind for ffi_go_closure_STDCALL */
1084    .long    C(ffi_go_closure_STDCALL)
1085    .set     L5,L(UW23)-L(UW21)
1086    .long    L5
1087    .long    0x04000000 /* use dwarf unwind info */
1088    .long    0
1089    .long    0
1090
1091    /* compact unwind for ffi_closure_REGISTER */
1092    .long    C(ffi_closure_REGISTER)
1093    .set     L6,L(UW26)-L(UW24)
1094    .long    L6
1095    .long    0x04000000 /* use dwarf unwind info */
1096    .long    0
1097    .long    0
1098
1099    /* compact unwind for ffi_closure_STDCALL */
1100    .long    C(ffi_closure_STDCALL)
1101    .set     L7,L(UW31)-L(UW27)
1102    .long    L7
1103    .long    0x04000000 /* use dwarf unwind info */
1104    .long    0
1105    .long    0
1106
1107    /* compact unwind for ffi_closure_raw_SYSV */
1108    .long    C(ffi_closure_raw_SYSV)
1109    .set     L8,L(UW40)-L(UW32)
1110    .long    L8
1111    .long    0x04000000 /* use dwarf unwind info */
1112    .long    0
1113    .long    0
1114
1115    /* compact unwind for ffi_closure_raw_THISCALL */
1116    .long    C(ffi_closure_raw_THISCALL)
1117    .set     L9,L(UW52)-L(UW41)
1118    .long    L9
1119    .long    0x04000000 /* use dwarf unwind info */
1120    .long    0
1121    .long    0
1122#endif /* __APPLE__ */
1123
1124#endif /* ifndef _MSC_VER */
1125#endif /* ifdef __i386__ */
1126
1127#if defined __ELF__ && defined __linux__
1128	.section	.note.GNU-stack,"",@progbits
1129#endif
1130