1/* ----------------------------------------------------------------------- 2 darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc. 3 Copyright (c) 2008 Red Hat, Inc. 4 derived from unix64.S 5 6 x86-64 Foreign Function Interface for Darwin. 7 8 Permission is hereby granted, free of charge, to any person obtaining 9 a copy of this software and associated documentation files (the 10 ``Software''), to deal in the Software without restriction, including 11 without limitation the rights to use, copy, modify, merge, publish, 12 distribute, sublicense, and/or sell copies of the Software, and to 13 permit persons to whom the Software is furnished to do so, subject to 14 the following conditions: 15 16 The above copyright notice and this permission notice shall be included 17 in all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 23 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 24 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 OTHER DEALINGS IN THE SOFTWARE. 26 ----------------------------------------------------------------------- */ 27 28#ifdef __x86_64__ 29#define LIBFFI_ASM 30#include <fficonfig.h> 31#include <ffi.h> 32 33 .file "darwin64.S" 34.text 35 36/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, 37 void *raddr, void (*fnaddr)(void)); 38 39 Bit o trickiness here -- ARGS+BYTES is the base of the stack frame 40 for this function. This has been allocated by ffi_call. We also 41 deallocate some of the stack that has been alloca'd. */ 42 43 .align 3 44 .globl _ffi_call_unix64 45 46_ffi_call_unix64: 47LUW0: 48 movq (%rsp), %r10 /* Load return address. */ 49 leaq (%rdi, %rsi), %rax /* Find local stack base. */ 50 movq %rdx, (%rax) /* Save flags. */ 51 movq %rcx, 8(%rax) /* Save raddr. */ 52 movq %rbp, 16(%rax) /* Save old frame pointer. */ 53 movq %r10, 24(%rax) /* Relocate return address. */ 54 movq %rax, %rbp /* Finalize local stack frame. */ 55LUW1: 56 movq %rdi, %r10 /* Save a copy of the register area. */ 57 movq %r8, %r11 /* Save a copy of the target fn. */ 58 movl %r9d, %eax /* Set number of SSE registers. */ 59 60 /* Load up all argument registers. */ 61 movq (%r10), %rdi 62 movq 8(%r10), %rsi 63 movq 16(%r10), %rdx 64 movq 24(%r10), %rcx 65 movq 32(%r10), %r8 66 movq 40(%r10), %r9 67 testl %eax, %eax 68 jnz Lload_sse 69Lret_from_load_sse: 70 71 /* Deallocate the reg arg area. */ 72 leaq 176(%r10), %rsp 73 74 /* Call the user function. */ 75 call *%r11 76 77 /* Deallocate stack arg area; local stack frame in redzone. */ 78 leaq 24(%rbp), %rsp 79 80 movq 0(%rbp), %rcx /* Reload flags. */ 81 movq 8(%rbp), %rdi /* Reload raddr. */ 82 movq 16(%rbp), %rbp /* Reload old frame pointer. */ 83LUW2: 84 85 /* The first byte of the flags contains the FFI_TYPE. */ 86 movzbl %cl, %r10d 87 leaq Lstore_table(%rip), %r11 88 movslq (%r11, %r10, 4), %r10 89 addq %r11, %r10 90 jmp *%r10 91 92Lstore_table: 93 .long Lst_void-Lstore_table /* FFI_TYPE_VOID */ 94 .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */ 95 .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */ 96 .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */ 97 .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */ 98 .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */ 99 .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */ 100 .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */ 101 .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */ 102 .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */ 103 .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */ 104 .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */ 105 .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */ 106 .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */ 107 .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */ 108 109 .text 110 .align 3 111Lst_void: 112 ret 113 .align 3 114Lst_uint8: 115 movzbq %al, %rax 116 movq %rax, (%rdi) 117 ret 118 .align 3 119Lst_sint8: 120 movsbq %al, %rax 121 movq %rax, (%rdi) 122 ret 123 .align 3 124Lst_uint16: 125 movzwq %ax, %rax 126 movq %rax, (%rdi) 127 .align 3 128Lst_sint16: 129 movswq %ax, %rax 130 movq %rax, (%rdi) 131 ret 132 .align 3 133Lst_uint32: 134 movl %eax, %eax 135 movq %rax, (%rdi) 136 .align 3 137Lst_sint32: 138 cltq 139 movq %rax, (%rdi) 140 ret 141 .align 3 142Lst_int64: 143 movq %rax, (%rdi) 144 ret 145 .align 3 146Lst_float: 147 movss %xmm0, (%rdi) 148 ret 149 .align 3 150Lst_double: 151 movsd %xmm0, (%rdi) 152 ret 153Lst_ldouble: 154 fstpt (%rdi) 155 ret 156 .align 3 157Lst_struct: 158 leaq -20(%rsp), %rsi /* Scratch area in redzone. */ 159 160 /* We have to locate the values now, and since we don't want to 161 write too much data into the user's return value, we spill the 162 value to a 16 byte scratch area first. Bits 8, 9, and 10 163 control where the values are located. Only one of the three 164 bits will be set; see ffi_prep_cif_machdep for the pattern. */ 165 movd %xmm0, %r10 166 movd %xmm1, %r11 167 testl $0x100, %ecx 168 cmovnz %rax, %rdx 169 cmovnz %r10, %rax 170 testl $0x200, %ecx 171 cmovnz %r10, %rdx 172 testl $0x400, %ecx 173 cmovnz %r10, %rax 174 cmovnz %r11, %rdx 175 movq %rax, (%rsi) 176 movq %rdx, 8(%rsi) 177 178 /* Bits 12-31 contain the true size of the structure. Copy from 179 the scratch area to the true destination. */ 180 shrl $12, %ecx 181 rep movsb 182 ret 183 184 /* Many times we can avoid loading any SSE registers at all. 185 It's not worth an indirect jump to load the exact set of 186 SSE registers needed; zero or all is a good compromise. */ 187 .align 3 188LUW3: 189Lload_sse: 190 movdqa 48(%r10), %xmm0 191 movdqa 64(%r10), %xmm1 192 movdqa 80(%r10), %xmm2 193 movdqa 96(%r10), %xmm3 194 movdqa 112(%r10), %xmm4 195 movdqa 128(%r10), %xmm5 196 movdqa 144(%r10), %xmm6 197 movdqa 160(%r10), %xmm7 198 jmp Lret_from_load_sse 199 200LUW4: 201 .align 3 202 .globl _ffi_closure_unix64 203 204_ffi_closure_unix64: 205LUW5: 206 /* The carry flag is set by the trampoline iff SSE registers 207 are used. Don't clobber it before the branch instruction. */ 208 leaq -200(%rsp), %rsp 209LUW6: 210 movq %rdi, (%rsp) 211 movq %rsi, 8(%rsp) 212 movq %rdx, 16(%rsp) 213 movq %rcx, 24(%rsp) 214 movq %r8, 32(%rsp) 215 movq %r9, 40(%rsp) 216 jc Lsave_sse 217Lret_from_save_sse: 218 219 movq %r10, %rdi 220 leaq 176(%rsp), %rsi 221 movq %rsp, %rdx 222 leaq 208(%rsp), %rcx 223 call _ffi_closure_unix64_inner 224 225 /* Deallocate stack frame early; return value is now in redzone. */ 226 addq $200, %rsp 227LUW7: 228 229 /* The first byte of the return value contains the FFI_TYPE. */ 230 movzbl %al, %r10d 231 leaq Lload_table(%rip), %r11 232 movslq (%r11, %r10, 4), %r10 233 addq %r11, %r10 234 jmp *%r10 235 236Lload_table: 237 .long Lld_void-Lload_table /* FFI_TYPE_VOID */ 238 .long Lld_int32-Lload_table /* FFI_TYPE_INT */ 239 .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */ 240 .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */ 241 .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */ 242 .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */ 243 .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */ 244 .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */ 245 .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */ 246 .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */ 247 .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */ 248 .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */ 249 .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */ 250 .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */ 251 .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */ 252 253 .text 254 .align 3 255Lld_void: 256 ret 257 .align 3 258Lld_int8: 259 movzbl -24(%rsp), %eax 260 ret 261 .align 3 262Lld_int16: 263 movzwl -24(%rsp), %eax 264 ret 265 .align 3 266Lld_int32: 267 movl -24(%rsp), %eax 268 ret 269 .align 3 270Lld_int64: 271 movq -24(%rsp), %rax 272 ret 273 .align 3 274Lld_float: 275 movss -24(%rsp), %xmm0 276 ret 277 .align 3 278Lld_double: 279 movsd -24(%rsp), %xmm0 280 ret 281 .align 3 282Lld_ldouble: 283 fldt -24(%rsp) 284 ret 285 .align 3 286Lld_struct: 287 /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, 288 %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading 289 both rdx and xmm1 with the second word. For the remaining, 290 bit 8 set means xmm0 gets the second word, and bit 9 means 291 that rax gets the second word. */ 292 movq -24(%rsp), %rcx 293 movq -16(%rsp), %rdx 294 movq -16(%rsp), %xmm1 295 testl $0x100, %eax 296 cmovnz %rdx, %rcx 297 movd %rcx, %xmm0 298 testl $0x200, %eax 299 movq -24(%rsp), %rax 300 cmovnz %rdx, %rax 301 ret 302 303 /* See the comment above Lload_sse; the same logic applies here. */ 304 .align 3 305LUW8: 306Lsave_sse: 307 movdqa %xmm0, 48(%rsp) 308 movdqa %xmm1, 64(%rsp) 309 movdqa %xmm2, 80(%rsp) 310 movdqa %xmm3, 96(%rsp) 311 movdqa %xmm4, 112(%rsp) 312 movdqa %xmm5, 128(%rsp) 313 movdqa %xmm6, 144(%rsp) 314 movdqa %xmm7, 160(%rsp) 315 jmp Lret_from_save_sse 316 317LUW9: 318.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support 319EH_frame1: 320 .set L$set$0,LECIE1-LSCIE1 /* CIE Length */ 321 .long L$set$0 322LSCIE1: 323 .long 0x0 /* CIE Identifier Tag */ 324 .byte 0x1 /* CIE Version */ 325 .ascii "zR\0" /* CIE Augmentation */ 326 .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */ 327 .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */ 328 .byte 0x10 /* CIE RA Column */ 329 .byte 0x1 /* uleb128 0x1; Augmentation size */ 330 .byte 0x10 /* FDE Encoding (pcrel sdata4) */ 331 .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ 332 .byte 0x7 /* uleb128 0x7 */ 333 .byte 0x8 /* uleb128 0x8 */ 334 .byte 0x90 /* DW_CFA_offset, column 0x10 */ 335 .byte 0x1 336 .align 3 337LECIE1: 338 .globl _ffi_call_unix64.eh 339_ffi_call_unix64.eh: 340LSFDE1: 341 .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */ 342 .long L$set$1 343LASFDE1: 344 .long LASFDE1-EH_frame1 /* FDE CIE offset */ 345 .quad LUW0-. /* FDE initial location */ 346 .set L$set$2,LUW4-LUW0 /* FDE address range */ 347 .quad L$set$2 348 .byte 0x0 /* Augmentation size */ 349 .byte 0x4 /* DW_CFA_advance_loc4 */ 350 .set L$set$3,LUW1-LUW0 351 .long L$set$3 352 353 /* New stack frame based off rbp. This is an itty bit of unwind 354 trickery in that the CFA *has* changed. There is no easy way 355 to describe it correctly on entry to the function. Fortunately, 356 it doesn't matter too much since at all points we can correctly 357 unwind back to ffi_call. Note that the location to which we 358 moved the return address is (the new) CFA-8, so from the 359 perspective of the unwind info, it hasn't moved. */ 360 .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ 361 .byte 0x6 362 .byte 0x20 363 .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ 364 .byte 0x2 365 .byte 0xa /* DW_CFA_remember_state */ 366 367 .byte 0x4 /* DW_CFA_advance_loc4 */ 368 .set L$set$4,LUW2-LUW1 369 .long L$set$4 370 .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ 371 .byte 0x7 372 .byte 0x8 373 .byte 0xc0+6 /* DW_CFA_restore, %rbp */ 374 375 .byte 0x4 /* DW_CFA_advance_loc4 */ 376 .set L$set$5,LUW3-LUW2 377 .long L$set$5 378 .byte 0xb /* DW_CFA_restore_state */ 379 380 .align 3 381LEFDE1: 382 .globl _ffi_closure_unix64.eh 383_ffi_closure_unix64.eh: 384LSFDE3: 385 .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */ 386 .long L$set$6 387LASFDE3: 388 .long LASFDE3-EH_frame1 /* FDE CIE offset */ 389 .quad LUW5-. /* FDE initial location */ 390 .set L$set$7,LUW9-LUW5 /* FDE address range */ 391 .quad L$set$7 392 .byte 0x0 /* Augmentation size */ 393 394 .byte 0x4 /* DW_CFA_advance_loc4 */ 395 .set L$set$8,LUW6-LUW5 396 .long L$set$8 397 .byte 0xe /* DW_CFA_def_cfa_offset */ 398 .byte 208,1 /* uleb128 208 */ 399 .byte 0xa /* DW_CFA_remember_state */ 400 401 .byte 0x4 /* DW_CFA_advance_loc4 */ 402 .set L$set$9,LUW7-LUW6 403 .long L$set$9 404 .byte 0xe /* DW_CFA_def_cfa_offset */ 405 .byte 0x8 406 407 .byte 0x4 /* DW_CFA_advance_loc4 */ 408 .set L$set$10,LUW8-LUW7 409 .long L$set$10 410 .byte 0xb /* DW_CFA_restore_state */ 411 412 .align 3 413LEFDE3: 414 .subsections_via_symbols 415 416#endif /* __x86_64__ */ 417