1 /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 ``Software''), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
21
22 #if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <stdint.h>
26 #include <fficonfig.h>
27 #include <ffi.h>
28 #include <ffi_common.h>
29 #include "internal.h"
30 #ifdef _M_ARM64
31 #include <windows.h> /* FlushInstructionCache */
32 #endif
33
34 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
35 all further uses in this file will refer to the 128-bit type. */
36 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
37 # if FFI_TYPE_LONGDOUBLE != 4
38 # error FFI_TYPE_LONGDOUBLE out of date
39 # endif
40 #else
41 # undef FFI_TYPE_LONGDOUBLE
42 # define FFI_TYPE_LONGDOUBLE 4
43 #endif
44
45 union _d
46 {
47 UINT64 d;
48 UINT32 s[2];
49 };
50
51 struct _v
52 {
53 union _d d[2] __attribute__((aligned(16)));
54 };
55
56 struct call_context
57 {
58 struct _v v[N_V_ARG_REG];
59 UINT64 x[N_X_ARG_REG];
60 };
61
62 #if FFI_EXEC_TRAMPOLINE_TABLE
63
64 #ifdef __MACH__
65 #include <mach/vm_param.h>
66 #endif
67
68 #else
69
70 #if defined (__clang__) && defined (__APPLE__)
71 extern void sys_icache_invalidate (void *start, size_t len);
72 #endif
73
74 static inline void
ffi_clear_cache(void * start,void * end)75 ffi_clear_cache (void *start, void *end)
76 {
77 #if defined (__clang__) && defined (__APPLE__)
78 sys_icache_invalidate (start, (char *)end - (char *)start);
79 #elif defined (__GNUC__)
80 __builtin___clear_cache (start, end);
81 #elif defined (_M_ARM64)
82 FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start);
83 #else
84 #error "Missing builtin to flush instruction cache"
85 #endif
86 }
87
88 #endif
89
90 /* A subroutine of is_vfp_type. Given a structure type, return the type code
91 of the first non-structure element. Recurse for structure elements.
92 Return -1 if the structure is in fact empty, i.e. no nested elements. */
93
94 static int
is_hfa0(const ffi_type * ty)95 is_hfa0 (const ffi_type *ty)
96 {
97 ffi_type **elements = ty->elements;
98 int i, ret = -1;
99
100 if (elements != NULL)
101 for (i = 0; elements[i]; ++i)
102 {
103 ret = elements[i]->type;
104 if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
105 {
106 ret = is_hfa0 (elements[i]);
107 if (ret < 0)
108 continue;
109 }
110 break;
111 }
112
113 return ret;
114 }
115
116 /* A subroutine of is_vfp_type. Given a structure type, return true if all
117 of the non-structure elements are the same as CANDIDATE. */
118
119 static int
is_hfa1(const ffi_type * ty,int candidate)120 is_hfa1 (const ffi_type *ty, int candidate)
121 {
122 ffi_type **elements = ty->elements;
123 int i;
124
125 if (elements != NULL)
126 for (i = 0; elements[i]; ++i)
127 {
128 int t = elements[i]->type;
129 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
130 {
131 if (!is_hfa1 (elements[i], candidate))
132 return 0;
133 }
134 else if (t != candidate)
135 return 0;
136 }
137
138 return 1;
139 }
140
141 /* Determine if TY may be allocated to the FP registers. This is both an
142 fp scalar type as well as an homogenous floating point aggregate (HFA).
143 That is, a structure consisting of 1 to 4 members of all the same type,
144 where that type is an fp scalar.
145
146 Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_*
147 constant for the type. */
148
149 static int
is_vfp_type(const ffi_type * ty)150 is_vfp_type (const ffi_type *ty)
151 {
152 ffi_type **elements;
153 int candidate, i;
154 size_t size, ele_count;
155
156 /* Quickest tests first. */
157 candidate = ty->type;
158 switch (candidate)
159 {
160 default:
161 return 0;
162 case FFI_TYPE_FLOAT:
163 case FFI_TYPE_DOUBLE:
164 case FFI_TYPE_LONGDOUBLE:
165 ele_count = 1;
166 goto done;
167 case FFI_TYPE_COMPLEX:
168 candidate = ty->elements[0]->type;
169 switch (candidate)
170 {
171 case FFI_TYPE_FLOAT:
172 case FFI_TYPE_DOUBLE:
173 case FFI_TYPE_LONGDOUBLE:
174 ele_count = 2;
175 goto done;
176 }
177 return 0;
178 case FFI_TYPE_STRUCT:
179 break;
180 }
181
182 /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
183 size = ty->size;
184 if (size < 4 || size > 64)
185 return 0;
186
187 /* Find the type of the first non-structure member. */
188 elements = ty->elements;
189 candidate = elements[0]->type;
190 if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
191 {
192 for (i = 0; ; ++i)
193 {
194 candidate = is_hfa0 (elements[i]);
195 if (candidate >= 0)
196 break;
197 }
198 }
199
200 /* If the first member is not a floating point type, it's not an HFA.
201 Also quickly re-check the size of the structure. */
202 switch (candidate)
203 {
204 case FFI_TYPE_FLOAT:
205 ele_count = size / sizeof(float);
206 if (size != ele_count * sizeof(float))
207 return 0;
208 break;
209 case FFI_TYPE_DOUBLE:
210 ele_count = size / sizeof(double);
211 if (size != ele_count * sizeof(double))
212 return 0;
213 break;
214 case FFI_TYPE_LONGDOUBLE:
215 ele_count = size / sizeof(long double);
216 if (size != ele_count * sizeof(long double))
217 return 0;
218 break;
219 default:
220 return 0;
221 }
222 if (ele_count > 4)
223 return 0;
224
225 /* Finally, make sure that all scalar elements are the same type. */
226 for (i = 0; elements[i]; ++i)
227 {
228 int t = elements[i]->type;
229 if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
230 {
231 if (!is_hfa1 (elements[i], candidate))
232 return 0;
233 }
234 else if (t != candidate)
235 return 0;
236 }
237
238 /* All tests succeeded. Encode the result. */
239 done:
240 return candidate * 4 + (4 - (int)ele_count);
241 }
242
243 /* Representation of the procedure call argument marshalling
244 state.
245
246 The terse state variable names match the names used in the AARCH64
247 PCS. */
248
249 struct arg_state
250 {
251 unsigned ngrn; /* Next general-purpose register number. */
252 unsigned nsrn; /* Next vector register number. */
253 size_t nsaa; /* Next stack offset. */
254
255 #if defined (__APPLE__)
256 unsigned allocating_variadic;
257 #endif
258 };
259
260 /* Initialize a procedure call argument marshalling state. */
261 static void
arg_init(struct arg_state * state)262 arg_init (struct arg_state *state)
263 {
264 state->ngrn = 0;
265 state->nsrn = 0;
266 state->nsaa = 0;
267 #if defined (__APPLE__)
268 state->allocating_variadic = 0;
269 #endif
270 }
271
272 /* Allocate an aligned slot on the stack and return a pointer to it. */
273 static void *
allocate_to_stack(struct arg_state * state,void * stack,size_t alignment,size_t size)274 allocate_to_stack (struct arg_state *state, void *stack,
275 size_t alignment, size_t size)
276 {
277 size_t nsaa = state->nsaa;
278
279 /* Round up the NSAA to the larger of 8 or the natural
280 alignment of the argument's type. */
281 #if defined (__APPLE__)
282 if (state->allocating_variadic && alignment < 8)
283 alignment = 8;
284 #else
285 if (alignment < 8)
286 alignment = 8;
287 #endif
288
289 nsaa = FFI_ALIGN (nsaa, alignment);
290 state->nsaa = nsaa + size;
291
292 return (char *)stack + nsaa;
293 }
294
295 static ffi_arg
extend_integer_type(void * source,int type)296 extend_integer_type (void *source, int type)
297 {
298 switch (type)
299 {
300 case FFI_TYPE_UINT8:
301 return *(UINT8 *) source;
302 case FFI_TYPE_SINT8:
303 return *(SINT8 *) source;
304 case FFI_TYPE_UINT16:
305 return *(UINT16 *) source;
306 case FFI_TYPE_SINT16:
307 return *(SINT16 *) source;
308 case FFI_TYPE_UINT32:
309 return *(UINT32 *) source;
310 case FFI_TYPE_INT:
311 case FFI_TYPE_SINT32:
312 return *(SINT32 *) source;
313 case FFI_TYPE_UINT64:
314 case FFI_TYPE_SINT64:
315 return *(UINT64 *) source;
316 break;
317 case FFI_TYPE_POINTER:
318 return *(uintptr_t *) source;
319 default:
320 abort();
321 }
322 }
323
324 #if defined(_MSC_VER)
325 void extend_hfa_type (void *dest, void *src, int h);
326 #else
327 static void
extend_hfa_type(void * dest,void * src,int h)328 extend_hfa_type (void *dest, void *src, int h)
329 {
330 ssize_t f = h - AARCH64_RET_S4;
331 void *x0;
332
333 asm volatile (
334 "adr %0, 0f\n"
335 " add %0, %0, %1\n"
336 " br %0\n"
337 "0: ldp s16, s17, [%3]\n" /* S4 */
338 " ldp s18, s19, [%3, #8]\n"
339 " b 4f\n"
340 " ldp s16, s17, [%3]\n" /* S3 */
341 " ldr s18, [%3, #8]\n"
342 " b 3f\n"
343 " ldp s16, s17, [%3]\n" /* S2 */
344 " b 2f\n"
345 " nop\n"
346 " ldr s16, [%3]\n" /* S1 */
347 " b 1f\n"
348 " nop\n"
349 " ldp d16, d17, [%3]\n" /* D4 */
350 " ldp d18, d19, [%3, #16]\n"
351 " b 4f\n"
352 " ldp d16, d17, [%3]\n" /* D3 */
353 " ldr d18, [%3, #16]\n"
354 " b 3f\n"
355 " ldp d16, d17, [%3]\n" /* D2 */
356 " b 2f\n"
357 " nop\n"
358 " ldr d16, [%3]\n" /* D1 */
359 " b 1f\n"
360 " nop\n"
361 " ldp q16, q17, [%3]\n" /* Q4 */
362 " ldp q18, q19, [%3, #32]\n"
363 " b 4f\n"
364 " ldp q16, q17, [%3]\n" /* Q3 */
365 " ldr q18, [%3, #32]\n"
366 " b 3f\n"
367 " ldp q16, q17, [%3]\n" /* Q2 */
368 " b 2f\n"
369 " nop\n"
370 " ldr q16, [%3]\n" /* Q1 */
371 " b 1f\n"
372 "4: str q19, [%2, #48]\n"
373 "3: str q18, [%2, #32]\n"
374 "2: str q17, [%2, #16]\n"
375 "1: str q16, [%2]"
376 : "=&r"(x0)
377 : "r"(f * 12), "r"(dest), "r"(src)
378 : "memory", "v16", "v17", "v18", "v19");
379 }
380 #endif
381
382 #if defined(_MSC_VER)
383 void* compress_hfa_type (void *dest, void *src, int h);
384 #else
385 static void *
compress_hfa_type(void * dest,void * reg,int h)386 compress_hfa_type (void *dest, void *reg, int h)
387 {
388 switch (h)
389 {
390 case AARCH64_RET_S1:
391 if (dest == reg)
392 {
393 #ifdef __AARCH64EB__
394 dest += 12;
395 #endif
396 }
397 else
398 *(float *)dest = *(float *)reg;
399 break;
400 case AARCH64_RET_S2:
401 asm ("ldp q16, q17, [%1]\n\t"
402 "st2 { v16.s, v17.s }[0], [%0]"
403 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
404 break;
405 case AARCH64_RET_S3:
406 asm ("ldp q16, q17, [%1]\n\t"
407 "ldr q18, [%1, #32]\n\t"
408 "st3 { v16.s, v17.s, v18.s }[0], [%0]"
409 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
410 break;
411 case AARCH64_RET_S4:
412 asm ("ldp q16, q17, [%1]\n\t"
413 "ldp q18, q19, [%1, #32]\n\t"
414 "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
415 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
416 break;
417
418 case AARCH64_RET_D1:
419 if (dest == reg)
420 {
421 #ifdef __AARCH64EB__
422 dest += 8;
423 #endif
424 }
425 else
426 *(double *)dest = *(double *)reg;
427 break;
428 case AARCH64_RET_D2:
429 asm ("ldp q16, q17, [%1]\n\t"
430 "st2 { v16.d, v17.d }[0], [%0]"
431 : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
432 break;
433 case AARCH64_RET_D3:
434 asm ("ldp q16, q17, [%1]\n\t"
435 "ldr q18, [%1, #32]\n\t"
436 "st3 { v16.d, v17.d, v18.d }[0], [%0]"
437 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
438 break;
439 case AARCH64_RET_D4:
440 asm ("ldp q16, q17, [%1]\n\t"
441 "ldp q18, q19, [%1, #32]\n\t"
442 "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
443 : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
444 break;
445
446 default:
447 if (dest != reg)
448 return memcpy (dest, reg, 16 * (4 - (h & 3)));
449 break;
450 }
451 return dest;
452 }
453 #endif
454
455 /* Either allocate an appropriate register for the argument type, or if
456 none are available, allocate a stack slot and return a pointer
457 to the allocated space. */
458
459 static void *
allocate_int_to_reg_or_stack(struct call_context * context,struct arg_state * state,void * stack,size_t size)460 allocate_int_to_reg_or_stack (struct call_context *context,
461 struct arg_state *state,
462 void *stack, size_t size)
463 {
464 if (state->ngrn < N_X_ARG_REG)
465 return &context->x[state->ngrn++];
466
467 state->ngrn = N_X_ARG_REG;
468 return allocate_to_stack (state, stack, size, size);
469 }
470
471 ffi_status FFI_HIDDEN
ffi_prep_cif_machdep(ffi_cif * cif)472 ffi_prep_cif_machdep (ffi_cif *cif)
473 {
474 ffi_type *rtype = cif->rtype;
475 size_t bytes = cif->bytes;
476 int flags, i, n;
477
478 switch (rtype->type)
479 {
480 case FFI_TYPE_VOID:
481 flags = AARCH64_RET_VOID;
482 break;
483 case FFI_TYPE_UINT8:
484 flags = AARCH64_RET_UINT8;
485 break;
486 case FFI_TYPE_UINT16:
487 flags = AARCH64_RET_UINT16;
488 break;
489 case FFI_TYPE_UINT32:
490 flags = AARCH64_RET_UINT32;
491 break;
492 case FFI_TYPE_SINT8:
493 flags = AARCH64_RET_SINT8;
494 break;
495 case FFI_TYPE_SINT16:
496 flags = AARCH64_RET_SINT16;
497 break;
498 case FFI_TYPE_INT:
499 case FFI_TYPE_SINT32:
500 flags = AARCH64_RET_SINT32;
501 break;
502 case FFI_TYPE_SINT64:
503 case FFI_TYPE_UINT64:
504 flags = AARCH64_RET_INT64;
505 break;
506 case FFI_TYPE_POINTER:
507 flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
508 break;
509
510 case FFI_TYPE_FLOAT:
511 case FFI_TYPE_DOUBLE:
512 case FFI_TYPE_LONGDOUBLE:
513 case FFI_TYPE_STRUCT:
514 case FFI_TYPE_COMPLEX:
515 flags = is_vfp_type (rtype);
516 if (flags == 0)
517 {
518 size_t s = rtype->size;
519 if (s > 16)
520 {
521 flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
522 bytes += 8;
523 }
524 else if (s == 16)
525 flags = AARCH64_RET_INT128;
526 else if (s == 8)
527 flags = AARCH64_RET_INT64;
528 else
529 flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
530 }
531 break;
532
533 default:
534 abort();
535 }
536
537 for (i = 0, n = cif->nargs; i < n; i++)
538 if (is_vfp_type (cif->arg_types[i]))
539 {
540 flags |= AARCH64_FLAG_ARG_V;
541 break;
542 }
543
544 /* Round the stack up to a multiple of the stack alignment requirement. */
545 cif->bytes = (unsigned) FFI_ALIGN(bytes, 16);
546 cif->flags = flags;
547 #if defined (__APPLE__)
548 cif->aarch64_nfixedargs = 0;
549 #endif
550
551 return FFI_OK;
552 }
553
554 #if defined (__APPLE__)
555 /* Perform Apple-specific cif processing for variadic calls */
556 ffi_status FFI_HIDDEN
ffi_prep_cif_machdep_var(ffi_cif * cif,unsigned int nfixedargs,unsigned int ntotalargs)557 ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs,
558 unsigned int ntotalargs)
559 {
560 ffi_status status = ffi_prep_cif_machdep (cif);
561 cif->aarch64_nfixedargs = nfixedargs;
562 return status;
563 }
564 #endif /* __APPLE__ */
565
566 extern void ffi_call_SYSV (struct call_context *context, void *frame,
567 void (*fn)(void), void *rvalue, int flags,
568 void *closure) FFI_HIDDEN;
569
570 /* Call a function with the provided arguments and capture the return
571 value. */
572 static void
ffi_call_int(ffi_cif * cif,void (* fn)(void),void * orig_rvalue,void ** avalue,void * closure)573 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
574 void **avalue, void *closure)
575 {
576 struct call_context *context;
577 void *stack, *frame, *rvalue;
578 struct arg_state state;
579 size_t stack_bytes, rtype_size, rsize;
580 int i, nargs, flags;
581 ffi_type *rtype;
582
583 flags = cif->flags;
584 rtype = cif->rtype;
585 rtype_size = rtype->size;
586 stack_bytes = cif->bytes;
587
588 /* If the target function returns a structure via hidden pointer,
589 then we cannot allow a null rvalue. Otherwise, mash a null
590 rvalue to void return type. */
591 rsize = 0;
592 if (flags & AARCH64_RET_IN_MEM)
593 {
594 if (orig_rvalue == NULL)
595 rsize = rtype_size;
596 }
597 else if (orig_rvalue == NULL)
598 flags &= AARCH64_FLAG_ARG_V;
599 else if (flags & AARCH64_RET_NEED_COPY)
600 rsize = 16;
601
602 /* Allocate consectutive stack for everything we'll need. */
603 context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
604 stack = context + 1;
605 frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes);
606 rvalue = (rsize ? (void*)((uintptr_t)frame + 32) : orig_rvalue);
607
608 arg_init (&state);
609 for (i = 0, nargs = cif->nargs; i < nargs; i++)
610 {
611 ffi_type *ty = cif->arg_types[i];
612 size_t s = ty->size;
613 void *a = avalue[i];
614 int h, t;
615
616 t = ty->type;
617 switch (t)
618 {
619 case FFI_TYPE_VOID:
620 FFI_ASSERT (0);
621 break;
622
623 /* If the argument is a basic type the argument is allocated to an
624 appropriate register, or if none are available, to the stack. */
625 case FFI_TYPE_INT:
626 case FFI_TYPE_UINT8:
627 case FFI_TYPE_SINT8:
628 case FFI_TYPE_UINT16:
629 case FFI_TYPE_SINT16:
630 case FFI_TYPE_UINT32:
631 case FFI_TYPE_SINT32:
632 case FFI_TYPE_UINT64:
633 case FFI_TYPE_SINT64:
634 case FFI_TYPE_POINTER:
635 do_pointer:
636 {
637 ffi_arg ext = extend_integer_type (a, t);
638 if (state.ngrn < N_X_ARG_REG)
639 context->x[state.ngrn++] = ext;
640 else
641 {
642 void *d = allocate_to_stack (&state, stack, ty->alignment, s);
643 state.ngrn = N_X_ARG_REG;
644 /* Note that the default abi extends each argument
645 to a full 64-bit slot, while the iOS abi allocates
646 only enough space. */
647 #ifdef __APPLE__
648 memcpy(d, a, s);
649 #else
650 *(ffi_arg *)d = ext;
651 #endif
652 }
653 }
654 break;
655
656 case FFI_TYPE_FLOAT:
657 case FFI_TYPE_DOUBLE:
658 case FFI_TYPE_LONGDOUBLE:
659 case FFI_TYPE_STRUCT:
660 case FFI_TYPE_COMPLEX:
661 {
662 void *dest;
663
664 h = is_vfp_type (ty);
665 if (h)
666 {
667 int elems = 4 - (h & 3);
668 #ifdef _M_ARM64 /* for handling armasm calling convention */
669 if (cif->is_variadic)
670 {
671 if (state.ngrn + elems <= N_X_ARG_REG)
672 {
673 dest = &context->x[state.ngrn];
674 state.ngrn += elems;
675 extend_hfa_type(dest, a, h);
676 break;
677 }
678 state.nsrn = N_X_ARG_REG;
679 dest = allocate_to_stack(&state, stack, ty->alignment, s);
680 }
681 else
682 {
683 #endif /* for handling armasm calling convention */
684 if (state.nsrn + elems <= N_V_ARG_REG)
685 {
686 dest = &context->v[state.nsrn];
687 state.nsrn += elems;
688 extend_hfa_type (dest, a, h);
689 break;
690 }
691 state.nsrn = N_V_ARG_REG;
692 dest = allocate_to_stack (&state, stack, ty->alignment, s);
693 #ifdef _M_ARM64 /* for handling armasm calling convention */
694 }
695 #endif /* for handling armasm calling convention */
696 }
697 else if (s > 16)
698 {
699 /* If the argument is a composite type that is larger than 16
700 bytes, then the argument has been copied to memory, and
701 the argument is replaced by a pointer to the copy. */
702 a = &avalue[i];
703 t = FFI_TYPE_POINTER;
704 s = sizeof (void *);
705 goto do_pointer;
706 }
707 else
708 {
709 size_t n = (s + 7) / 8;
710 if (state.ngrn + n <= N_X_ARG_REG)
711 {
712 /* If the argument is a composite type and the size in
713 double-words is not more than the number of available
714 X registers, then the argument is copied into
715 consecutive X registers. */
716 dest = &context->x[state.ngrn];
717 state.ngrn += (unsigned int)n;
718 }
719 else
720 {
721 /* Otherwise, there are insufficient X registers. Further
722 X register allocations are prevented, the NSAA is
723 adjusted and the argument is copied to memory at the
724 adjusted NSAA. */
725 state.ngrn = N_X_ARG_REG;
726 dest = allocate_to_stack (&state, stack, ty->alignment, s);
727 }
728 }
729 memcpy (dest, a, s);
730 }
731 break;
732
733 default:
734 abort();
735 }
736
737 #if defined (__APPLE__)
738 if (i + 1 == cif->aarch64_nfixedargs)
739 {
740 state.ngrn = N_X_ARG_REG;
741 state.nsrn = N_V_ARG_REG;
742 state.allocating_variadic = 1;
743 }
744 #endif
745 }
746
747 ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
748
749 if (flags & AARCH64_RET_NEED_COPY)
750 memcpy (orig_rvalue, rvalue, rtype_size);
751 }
752
753 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)754 ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
755 {
756 ffi_call_int (cif, fn, rvalue, avalue, NULL);
757 }
758
759 #ifdef FFI_GO_CLOSURES
760 void
ffi_call_go(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue,void * closure)761 ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
762 void **avalue, void *closure)
763 {
764 ffi_call_int (cif, fn, rvalue, avalue, closure);
765 }
766 #endif /* FFI_GO_CLOSURES */
767
768 /* Build a trampoline. */
769
770 extern void ffi_closure_SYSV (void) FFI_HIDDEN;
771 extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
772
773 ffi_status
ffi_prep_closure_loc(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)774 ffi_prep_closure_loc (ffi_closure *closure,
775 ffi_cif* cif,
776 void (*fun)(ffi_cif*,void*,void**,void*),
777 void *user_data,
778 void *codeloc)
779 {
780 if (cif->abi != FFI_SYSV)
781 return FFI_BAD_ABI;
782
783 void (*start)(void);
784
785 if (cif->flags & AARCH64_FLAG_ARG_V)
786 start = ffi_closure_SYSV_V;
787 else
788 start = ffi_closure_SYSV;
789
790 #if FFI_EXEC_TRAMPOLINE_TABLE
791 #ifdef __MACH__
792 void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
793 config[0] = closure;
794 config[1] = start;
795 #endif
796 #else
797 static const unsigned char trampoline[16] = {
798 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
799 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */
800 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
801 };
802 char *tramp = closure->tramp;
803
804 memcpy (tramp, trampoline, sizeof(trampoline));
805
806 *(UINT64 *)(tramp + 16) = (uintptr_t)start;
807
808 ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
809
810 /* Also flush the cache for code mapping. */
811 #ifdef _M_ARM64
812 // Not using dlmalloc.c for Windows ARM64 builds
813 // so calling ffi_data_to_code_pointer() isn't necessary
814 unsigned char *tramp_code = tramp;
815 #else
816 unsigned char *tramp_code = ffi_data_to_code_pointer (tramp);
817 #endif
818 ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE);
819 #endif
820
821 closure->cif = cif;
822 closure->fun = fun;
823 closure->user_data = user_data;
824
825 return FFI_OK;
826 }
827
828 #ifdef FFI_GO_CLOSURES
829 extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
830 extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
831
832 ffi_status
ffi_prep_go_closure(ffi_go_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *))833 ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
834 void (*fun)(ffi_cif*,void*,void**,void*))
835 {
836 void (*start)(void);
837
838 if (cif->abi != FFI_SYSV)
839 return FFI_BAD_ABI;
840
841 if (cif->flags & AARCH64_FLAG_ARG_V)
842 start = ffi_go_closure_SYSV_V;
843 else
844 start = ffi_go_closure_SYSV;
845
846 closure->tramp = start;
847 closure->cif = cif;
848 closure->fun = fun;
849
850 return FFI_OK;
851 }
852 #endif /* FFI_GO_CLOSURES */
853
854 /* Primary handler to setup and invoke a function within a closure.
855
856 A closure when invoked enters via the assembler wrapper
857 ffi_closure_SYSV(). The wrapper allocates a call context on the
858 stack, saves the interesting registers (from the perspective of
859 the calling convention) into the context then passes control to
860 ffi_closure_SYSV_inner() passing the saved context and a pointer to
861 the stack at the point ffi_closure_SYSV() was invoked.
862
863 On the return path the assembler wrapper will reload call context
864 registers.
865
866 ffi_closure_SYSV_inner() marshalls the call context into ffi value
867 descriptors, invokes the wrapped function, then marshalls the return
868 value back into the call context. */
869
870 int FFI_HIDDEN
ffi_closure_SYSV_inner(ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,struct call_context * context,void * stack,void * rvalue,void * struct_rvalue)871 ffi_closure_SYSV_inner (ffi_cif *cif,
872 void (*fun)(ffi_cif*,void*,void**,void*),
873 void *user_data,
874 struct call_context *context,
875 void *stack, void *rvalue, void *struct_rvalue)
876 {
877 void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
878 int i, h, nargs, flags;
879 struct arg_state state;
880
881 arg_init (&state);
882
883 for (i = 0, nargs = cif->nargs; i < nargs; i++)
884 {
885 ffi_type *ty = cif->arg_types[i];
886 int t = ty->type;
887 size_t n, s = ty->size;
888
889 switch (t)
890 {
891 case FFI_TYPE_VOID:
892 FFI_ASSERT (0);
893 break;
894
895 case FFI_TYPE_INT:
896 case FFI_TYPE_UINT8:
897 case FFI_TYPE_SINT8:
898 case FFI_TYPE_UINT16:
899 case FFI_TYPE_SINT16:
900 case FFI_TYPE_UINT32:
901 case FFI_TYPE_SINT32:
902 case FFI_TYPE_UINT64:
903 case FFI_TYPE_SINT64:
904 case FFI_TYPE_POINTER:
905 avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
906 break;
907
908 case FFI_TYPE_FLOAT:
909 case FFI_TYPE_DOUBLE:
910 case FFI_TYPE_LONGDOUBLE:
911 case FFI_TYPE_STRUCT:
912 case FFI_TYPE_COMPLEX:
913 h = is_vfp_type (ty);
914 if (h)
915 {
916 n = 4 - (h & 3);
917 #ifdef _M_ARM64 /* for handling armasm calling convention */
918 if (cif->is_variadic)
919 {
920 if (state.ngrn + n <= N_X_ARG_REG)
921 {
922 void *reg = &context->x[state.ngrn];
923 state.ngrn += (unsigned int)n;
924
925 /* Eeek! We need a pointer to the structure, however the
926 homogeneous float elements are being passed in individual
927 registers, therefore for float and double the structure
928 is not represented as a contiguous sequence of bytes in
929 our saved register context. We don't need the original
930 contents of the register storage, so we reformat the
931 structure into the same memory. */
932 avalue[i] = compress_hfa_type(reg, reg, h);
933 }
934 else
935 {
936 state.ngrn = N_X_ARG_REG;
937 state.nsrn = N_V_ARG_REG;
938 avalue[i] = allocate_to_stack(&state, stack,
939 ty->alignment, s);
940 }
941 }
942 else
943 {
944 #endif /* for handling armasm calling convention */
945 if (state.nsrn + n <= N_V_ARG_REG)
946 {
947 void *reg = &context->v[state.nsrn];
948 state.nsrn += (unsigned int)n;
949 avalue[i] = compress_hfa_type(reg, reg, h);
950 }
951 else
952 {
953 state.nsrn = N_V_ARG_REG;
954 avalue[i] = allocate_to_stack(&state, stack,
955 ty->alignment, s);
956 }
957 #ifdef _M_ARM64 /* for handling armasm calling convention */
958 }
959 #endif /* for handling armasm calling convention */
960 }
961 else if (s > 16)
962 {
963 /* Replace Composite type of size greater than 16 with a
964 pointer. */
965 avalue[i] = *(void **)
966 allocate_int_to_reg_or_stack (context, &state, stack,
967 sizeof (void *));
968 }
969 else
970 {
971 n = (s + 7) / 8;
972 if (state.ngrn + n <= N_X_ARG_REG)
973 {
974 avalue[i] = &context->x[state.ngrn];
975 state.ngrn += (unsigned int)n;
976 }
977 else
978 {
979 state.ngrn = N_X_ARG_REG;
980 avalue[i] = allocate_to_stack(&state, stack,
981 ty->alignment, s);
982 }
983 }
984 break;
985
986 default:
987 abort();
988 }
989
990 #if defined (__APPLE__)
991 if (i + 1 == cif->aarch64_nfixedargs)
992 {
993 state.ngrn = N_X_ARG_REG;
994 state.nsrn = N_V_ARG_REG;
995 state.allocating_variadic = 1;
996 }
997 #endif
998 }
999
1000 flags = cif->flags;
1001 if (flags & AARCH64_RET_IN_MEM)
1002 rvalue = struct_rvalue;
1003
1004 fun (cif, rvalue, avalue, user_data);
1005
1006 return flags;
1007 }
1008
1009 #endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/
1010