• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2 
3 Permission is hereby granted, free of charge, to any person obtaining
4 a copy of this software and associated documentation files (the
5 ``Software''), to deal in the Software without restriction, including
6 without limitation the rights to use, copy, modify, merge, publish,
7 distribute, sublicense, and/or sell copies of the Software, and to
8 permit persons to whom the Software is furnished to do so, subject to
9 the following conditions:
10 
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
21 
22 #if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <stdint.h>
26 #include <fficonfig.h>
27 #include <ffi.h>
28 #include <ffi_common.h>
29 #include "internal.h"
30 #ifdef _M_ARM64
31 #include <windows.h> /* FlushInstructionCache */
32 #endif
33 
34 /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
35    all further uses in this file will refer to the 128-bit type.  */
36 #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
37 # if FFI_TYPE_LONGDOUBLE != 4
38 #  error FFI_TYPE_LONGDOUBLE out of date
39 # endif
40 #else
41 # undef FFI_TYPE_LONGDOUBLE
42 # define FFI_TYPE_LONGDOUBLE 4
43 #endif
44 
45 union _d
46 {
47   UINT64 d;
48   UINT32 s[2];
49 };
50 
51 struct _v
52 {
53   union _d d[2] __attribute__((aligned(16)));
54 };
55 
56 struct call_context
57 {
58   struct _v v[N_V_ARG_REG];
59   UINT64 x[N_X_ARG_REG];
60 };
61 
62 #if FFI_EXEC_TRAMPOLINE_TABLE
63 
64 #ifdef __MACH__
65 #include <mach/vm_param.h>
66 #endif
67 
68 #else
69 
70 #if defined (__clang__) && defined (__APPLE__)
71 extern void sys_icache_invalidate (void *start, size_t len);
72 #endif
73 
74 static inline void
ffi_clear_cache(void * start,void * end)75 ffi_clear_cache (void *start, void *end)
76 {
77 #if defined (__clang__) && defined (__APPLE__)
78   sys_icache_invalidate (start, (char *)end - (char *)start);
79 #elif defined (__GNUC__)
80   __builtin___clear_cache (start, end);
81 #elif defined (_M_ARM64)
82   FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start);
83 #else
84 #error "Missing builtin to flush instruction cache"
85 #endif
86 }
87 
88 #endif
89 
90 /* A subroutine of is_vfp_type.  Given a structure type, return the type code
91    of the first non-structure element.  Recurse for structure elements.
92    Return -1 if the structure is in fact empty, i.e. no nested elements.  */
93 
94 static int
is_hfa0(const ffi_type * ty)95 is_hfa0 (const ffi_type *ty)
96 {
97   ffi_type **elements = ty->elements;
98   int i, ret = -1;
99 
100   if (elements != NULL)
101     for (i = 0; elements[i]; ++i)
102       {
103         ret = elements[i]->type;
104         if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
105           {
106             ret = is_hfa0 (elements[i]);
107             if (ret < 0)
108               continue;
109           }
110         break;
111       }
112 
113   return ret;
114 }
115 
116 /* A subroutine of is_vfp_type.  Given a structure type, return true if all
117    of the non-structure elements are the same as CANDIDATE.  */
118 
119 static int
is_hfa1(const ffi_type * ty,int candidate)120 is_hfa1 (const ffi_type *ty, int candidate)
121 {
122   ffi_type **elements = ty->elements;
123   int i;
124 
125   if (elements != NULL)
126     for (i = 0; elements[i]; ++i)
127       {
128         int t = elements[i]->type;
129         if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
130           {
131             if (!is_hfa1 (elements[i], candidate))
132               return 0;
133           }
134         else if (t != candidate)
135           return 0;
136       }
137 
138   return 1;
139 }
140 
141 /* Determine if TY may be allocated to the FP registers.  This is both an
142    fp scalar type as well as an homogenous floating point aggregate (HFA).
143    That is, a structure consisting of 1 to 4 members of all the same type,
144    where that type is an fp scalar.
145 
146    Returns non-zero iff TY is an HFA.  The result is the AARCH64_RET_*
147    constant for the type.  */
148 
149 static int
is_vfp_type(const ffi_type * ty)150 is_vfp_type (const ffi_type *ty)
151 {
152   ffi_type **elements;
153   int candidate, i;
154   size_t size, ele_count;
155 
156   /* Quickest tests first.  */
157   candidate = ty->type;
158   switch (candidate)
159     {
160     default:
161       return 0;
162     case FFI_TYPE_FLOAT:
163     case FFI_TYPE_DOUBLE:
164     case FFI_TYPE_LONGDOUBLE:
165       ele_count = 1;
166       goto done;
167     case FFI_TYPE_COMPLEX:
168       candidate = ty->elements[0]->type;
169       switch (candidate)
170 	{
171 	case FFI_TYPE_FLOAT:
172 	case FFI_TYPE_DOUBLE:
173 	case FFI_TYPE_LONGDOUBLE:
174 	  ele_count = 2;
175 	  goto done;
176 	}
177       return 0;
178     case FFI_TYPE_STRUCT:
179       break;
180     }
181 
182   /* No HFA types are smaller than 4 bytes, or larger than 64 bytes.  */
183   size = ty->size;
184   if (size < 4 || size > 64)
185     return 0;
186 
187   /* Find the type of the first non-structure member.  */
188   elements = ty->elements;
189   candidate = elements[0]->type;
190   if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
191     {
192       for (i = 0; ; ++i)
193         {
194           candidate = is_hfa0 (elements[i]);
195           if (candidate >= 0)
196             break;
197         }
198     }
199 
200   /* If the first member is not a floating point type, it's not an HFA.
201      Also quickly re-check the size of the structure.  */
202   switch (candidate)
203     {
204     case FFI_TYPE_FLOAT:
205       ele_count = size / sizeof(float);
206       if (size != ele_count * sizeof(float))
207         return 0;
208       break;
209     case FFI_TYPE_DOUBLE:
210       ele_count = size / sizeof(double);
211       if (size != ele_count * sizeof(double))
212         return 0;
213       break;
214     case FFI_TYPE_LONGDOUBLE:
215       ele_count = size / sizeof(long double);
216       if (size != ele_count * sizeof(long double))
217         return 0;
218       break;
219     default:
220       return 0;
221     }
222   if (ele_count > 4)
223     return 0;
224 
225   /* Finally, make sure that all scalar elements are the same type.  */
226   for (i = 0; elements[i]; ++i)
227     {
228       int t = elements[i]->type;
229       if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
230         {
231           if (!is_hfa1 (elements[i], candidate))
232             return 0;
233         }
234       else if (t != candidate)
235         return 0;
236     }
237 
238   /* All tests succeeded.  Encode the result.  */
239  done:
240   return candidate * 4 + (4 - (int)ele_count);
241 }
242 
243 /* Representation of the procedure call argument marshalling
244    state.
245 
246    The terse state variable names match the names used in the AARCH64
247    PCS. */
248 
249 struct arg_state
250 {
251   unsigned ngrn;                /* Next general-purpose register number. */
252   unsigned nsrn;                /* Next vector register number. */
253   size_t nsaa;                  /* Next stack offset. */
254 
255 #if defined (__APPLE__)
256   unsigned allocating_variadic;
257 #endif
258 };
259 
260 /* Initialize a procedure call argument marshalling state.  */
261 static void
arg_init(struct arg_state * state)262 arg_init (struct arg_state *state)
263 {
264   state->ngrn = 0;
265   state->nsrn = 0;
266   state->nsaa = 0;
267 #if defined (__APPLE__)
268   state->allocating_variadic = 0;
269 #endif
270 }
271 
272 /* Allocate an aligned slot on the stack and return a pointer to it.  */
273 static void *
allocate_to_stack(struct arg_state * state,void * stack,size_t alignment,size_t size)274 allocate_to_stack (struct arg_state *state, void *stack,
275 		   size_t alignment, size_t size)
276 {
277   size_t nsaa = state->nsaa;
278 
279   /* Round up the NSAA to the larger of 8 or the natural
280      alignment of the argument's type.  */
281 #if defined (__APPLE__)
282   if (state->allocating_variadic && alignment < 8)
283     alignment = 8;
284 #else
285   if (alignment < 8)
286     alignment = 8;
287 #endif
288 
289   nsaa = FFI_ALIGN (nsaa, alignment);
290   state->nsaa = nsaa + size;
291 
292   return (char *)stack + nsaa;
293 }
294 
295 static ffi_arg
extend_integer_type(void * source,int type)296 extend_integer_type (void *source, int type)
297 {
298   switch (type)
299     {
300     case FFI_TYPE_UINT8:
301       return *(UINT8 *) source;
302     case FFI_TYPE_SINT8:
303       return *(SINT8 *) source;
304     case FFI_TYPE_UINT16:
305       return *(UINT16 *) source;
306     case FFI_TYPE_SINT16:
307       return *(SINT16 *) source;
308     case FFI_TYPE_UINT32:
309       return *(UINT32 *) source;
310     case FFI_TYPE_INT:
311     case FFI_TYPE_SINT32:
312       return *(SINT32 *) source;
313     case FFI_TYPE_UINT64:
314     case FFI_TYPE_SINT64:
315       return *(UINT64 *) source;
316       break;
317     case FFI_TYPE_POINTER:
318       return *(uintptr_t *) source;
319     default:
320       abort();
321     }
322 }
323 
324 #if defined(_MSC_VER)
325 void extend_hfa_type (void *dest, void *src, int h);
326 #else
327 static void
extend_hfa_type(void * dest,void * src,int h)328 extend_hfa_type (void *dest, void *src, int h)
329 {
330   ssize_t f = h - AARCH64_RET_S4;
331   void *x0;
332 
333   asm volatile (
334 	"adr	%0, 0f\n"
335 "	add	%0, %0, %1\n"
336 "	br	%0\n"
337 "0:	ldp	s16, s17, [%3]\n"	/* S4 */
338 "	ldp	s18, s19, [%3, #8]\n"
339 "	b	4f\n"
340 "	ldp	s16, s17, [%3]\n"	/* S3 */
341 "	ldr	s18, [%3, #8]\n"
342 "	b	3f\n"
343 "	ldp	s16, s17, [%3]\n"	/* S2 */
344 "	b	2f\n"
345 "	nop\n"
346 "	ldr	s16, [%3]\n"		/* S1 */
347 "	b	1f\n"
348 "	nop\n"
349 "	ldp	d16, d17, [%3]\n"	/* D4 */
350 "	ldp	d18, d19, [%3, #16]\n"
351 "	b	4f\n"
352 "	ldp	d16, d17, [%3]\n"	/* D3 */
353 "	ldr	d18, [%3, #16]\n"
354 "	b	3f\n"
355 "	ldp	d16, d17, [%3]\n"	/* D2 */
356 "	b	2f\n"
357 "	nop\n"
358 "	ldr	d16, [%3]\n"		/* D1 */
359 "	b	1f\n"
360 "	nop\n"
361 "	ldp	q16, q17, [%3]\n"	/* Q4 */
362 "	ldp	q18, q19, [%3, #32]\n"
363 "	b	4f\n"
364 "	ldp	q16, q17, [%3]\n"	/* Q3 */
365 "	ldr	q18, [%3, #32]\n"
366 "	b	3f\n"
367 "	ldp	q16, q17, [%3]\n"	/* Q2 */
368 "	b	2f\n"
369 "	nop\n"
370 "	ldr	q16, [%3]\n"		/* Q1 */
371 "	b	1f\n"
372 "4:	str	q19, [%2, #48]\n"
373 "3:	str	q18, [%2, #32]\n"
374 "2:	str	q17, [%2, #16]\n"
375 "1:	str	q16, [%2]"
376     : "=&r"(x0)
377     : "r"(f * 12), "r"(dest), "r"(src)
378     : "memory", "v16", "v17", "v18", "v19");
379 }
380 #endif
381 
382 #if defined(_MSC_VER)
383 void* compress_hfa_type (void *dest, void *src, int h);
384 #else
385 static void *
compress_hfa_type(void * dest,void * reg,int h)386 compress_hfa_type (void *dest, void *reg, int h)
387 {
388   switch (h)
389     {
390     case AARCH64_RET_S1:
391       if (dest == reg)
392 	{
393 #ifdef __AARCH64EB__
394 	  dest += 12;
395 #endif
396 	}
397       else
398 	*(float *)dest = *(float *)reg;
399       break;
400     case AARCH64_RET_S2:
401       asm ("ldp q16, q17, [%1]\n\t"
402 	   "st2 { v16.s, v17.s }[0], [%0]"
403 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
404       break;
405     case AARCH64_RET_S3:
406       asm ("ldp q16, q17, [%1]\n\t"
407 	   "ldr q18, [%1, #32]\n\t"
408 	   "st3 { v16.s, v17.s, v18.s }[0], [%0]"
409 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
410       break;
411     case AARCH64_RET_S4:
412       asm ("ldp q16, q17, [%1]\n\t"
413 	   "ldp q18, q19, [%1, #32]\n\t"
414 	   "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
415 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
416       break;
417 
418     case AARCH64_RET_D1:
419       if (dest == reg)
420 	{
421 #ifdef __AARCH64EB__
422 	  dest += 8;
423 #endif
424 	}
425       else
426 	*(double *)dest = *(double *)reg;
427       break;
428     case AARCH64_RET_D2:
429       asm ("ldp q16, q17, [%1]\n\t"
430 	   "st2 { v16.d, v17.d }[0], [%0]"
431 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
432       break;
433     case AARCH64_RET_D3:
434       asm ("ldp q16, q17, [%1]\n\t"
435 	   "ldr q18, [%1, #32]\n\t"
436 	   "st3 { v16.d, v17.d, v18.d }[0], [%0]"
437 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
438       break;
439     case AARCH64_RET_D4:
440       asm ("ldp q16, q17, [%1]\n\t"
441 	   "ldp q18, q19, [%1, #32]\n\t"
442 	   "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
443 	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
444       break;
445 
446     default:
447       if (dest != reg)
448 	return memcpy (dest, reg, 16 * (4 - (h & 3)));
449       break;
450     }
451   return dest;
452 }
453 #endif
454 
455 /* Either allocate an appropriate register for the argument type, or if
456    none are available, allocate a stack slot and return a pointer
457    to the allocated space.  */
458 
459 static void *
allocate_int_to_reg_or_stack(struct call_context * context,struct arg_state * state,void * stack,size_t size)460 allocate_int_to_reg_or_stack (struct call_context *context,
461 			      struct arg_state *state,
462 			      void *stack, size_t size)
463 {
464   if (state->ngrn < N_X_ARG_REG)
465     return &context->x[state->ngrn++];
466 
467   state->ngrn = N_X_ARG_REG;
468   return allocate_to_stack (state, stack, size, size);
469 }
470 
471 ffi_status FFI_HIDDEN
ffi_prep_cif_machdep(ffi_cif * cif)472 ffi_prep_cif_machdep (ffi_cif *cif)
473 {
474   ffi_type *rtype = cif->rtype;
475   size_t bytes = cif->bytes;
476   int flags, i, n;
477 
478   switch (rtype->type)
479     {
480     case FFI_TYPE_VOID:
481       flags = AARCH64_RET_VOID;
482       break;
483     case FFI_TYPE_UINT8:
484       flags = AARCH64_RET_UINT8;
485       break;
486     case FFI_TYPE_UINT16:
487       flags = AARCH64_RET_UINT16;
488       break;
489     case FFI_TYPE_UINT32:
490       flags = AARCH64_RET_UINT32;
491       break;
492     case FFI_TYPE_SINT8:
493       flags = AARCH64_RET_SINT8;
494       break;
495     case FFI_TYPE_SINT16:
496       flags = AARCH64_RET_SINT16;
497       break;
498     case FFI_TYPE_INT:
499     case FFI_TYPE_SINT32:
500       flags = AARCH64_RET_SINT32;
501       break;
502     case FFI_TYPE_SINT64:
503     case FFI_TYPE_UINT64:
504       flags = AARCH64_RET_INT64;
505       break;
506     case FFI_TYPE_POINTER:
507       flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
508       break;
509 
510     case FFI_TYPE_FLOAT:
511     case FFI_TYPE_DOUBLE:
512     case FFI_TYPE_LONGDOUBLE:
513     case FFI_TYPE_STRUCT:
514     case FFI_TYPE_COMPLEX:
515       flags = is_vfp_type (rtype);
516       if (flags == 0)
517 	{
518 	  size_t s = rtype->size;
519 	  if (s > 16)
520 	    {
521 	      flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
522 	      bytes += 8;
523 	    }
524 	  else if (s == 16)
525 	    flags = AARCH64_RET_INT128;
526 	  else if (s == 8)
527 	    flags = AARCH64_RET_INT64;
528 	  else
529 	    flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
530 	}
531       break;
532 
533     default:
534       abort();
535     }
536 
537   for (i = 0, n = cif->nargs; i < n; i++)
538     if (is_vfp_type (cif->arg_types[i]))
539       {
540 	flags |= AARCH64_FLAG_ARG_V;
541 	break;
542       }
543 
544   /* Round the stack up to a multiple of the stack alignment requirement. */
545   cif->bytes = (unsigned) FFI_ALIGN(bytes, 16);
546   cif->flags = flags;
547 #if defined (__APPLE__)
548   cif->aarch64_nfixedargs = 0;
549 #endif
550 
551   return FFI_OK;
552 }
553 
554 #if defined (__APPLE__)
555 /* Perform Apple-specific cif processing for variadic calls */
556 ffi_status FFI_HIDDEN
ffi_prep_cif_machdep_var(ffi_cif * cif,unsigned int nfixedargs,unsigned int ntotalargs)557 ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs,
558 			 unsigned int ntotalargs)
559 {
560   ffi_status status = ffi_prep_cif_machdep (cif);
561   cif->aarch64_nfixedargs = nfixedargs;
562   return status;
563 }
564 #endif /* __APPLE__ */
565 
566 extern void ffi_call_SYSV (struct call_context *context, void *frame,
567 			   void (*fn)(void), void *rvalue, int flags,
568 			   void *closure) FFI_HIDDEN;
569 
570 /* Call a function with the provided arguments and capture the return
571    value.  */
572 static void
ffi_call_int(ffi_cif * cif,void (* fn)(void),void * orig_rvalue,void ** avalue,void * closure)573 ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
574 	      void **avalue, void *closure)
575 {
576   struct call_context *context;
577   void *stack, *frame, *rvalue;
578   struct arg_state state;
579   size_t stack_bytes, rtype_size, rsize;
580   int i, nargs, flags;
581   ffi_type *rtype;
582 
583   flags = cif->flags;
584   rtype = cif->rtype;
585   rtype_size = rtype->size;
586   stack_bytes = cif->bytes;
587 
588   /* If the target function returns a structure via hidden pointer,
589      then we cannot allow a null rvalue.  Otherwise, mash a null
590      rvalue to void return type.  */
591   rsize = 0;
592   if (flags & AARCH64_RET_IN_MEM)
593     {
594       if (orig_rvalue == NULL)
595 	rsize = rtype_size;
596     }
597   else if (orig_rvalue == NULL)
598     flags &= AARCH64_FLAG_ARG_V;
599   else if (flags & AARCH64_RET_NEED_COPY)
600     rsize = 16;
601 
602   /* Allocate consectutive stack for everything we'll need.  */
603   context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
604   stack = context + 1;
605   frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes);
606   rvalue = (rsize ? (void*)((uintptr_t)frame + 32) : orig_rvalue);
607 
608   arg_init (&state);
609   for (i = 0, nargs = cif->nargs; i < nargs; i++)
610     {
611       ffi_type *ty = cif->arg_types[i];
612       size_t s = ty->size;
613       void *a = avalue[i];
614       int h, t;
615 
616       t = ty->type;
617       switch (t)
618 	{
619 	case FFI_TYPE_VOID:
620 	  FFI_ASSERT (0);
621 	  break;
622 
623 	/* If the argument is a basic type the argument is allocated to an
624 	   appropriate register, or if none are available, to the stack.  */
625 	case FFI_TYPE_INT:
626 	case FFI_TYPE_UINT8:
627 	case FFI_TYPE_SINT8:
628 	case FFI_TYPE_UINT16:
629 	case FFI_TYPE_SINT16:
630 	case FFI_TYPE_UINT32:
631 	case FFI_TYPE_SINT32:
632 	case FFI_TYPE_UINT64:
633 	case FFI_TYPE_SINT64:
634 	case FFI_TYPE_POINTER:
635 	do_pointer:
636 	  {
637 	    ffi_arg ext = extend_integer_type (a, t);
638 	    if (state.ngrn < N_X_ARG_REG)
639 	      context->x[state.ngrn++] = ext;
640 	    else
641 	      {
642 		void *d = allocate_to_stack (&state, stack, ty->alignment, s);
643 		state.ngrn = N_X_ARG_REG;
644 		/* Note that the default abi extends each argument
645 		   to a full 64-bit slot, while the iOS abi allocates
646 		   only enough space. */
647 #ifdef __APPLE__
648 		memcpy(d, a, s);
649 #else
650 		*(ffi_arg *)d = ext;
651 #endif
652 	      }
653 	  }
654 	  break;
655 
656 	case FFI_TYPE_FLOAT:
657 	case FFI_TYPE_DOUBLE:
658 	case FFI_TYPE_LONGDOUBLE:
659 	case FFI_TYPE_STRUCT:
660 	case FFI_TYPE_COMPLEX:
661 	  {
662 	    void *dest;
663 
664 	    h = is_vfp_type (ty);
665 	    if (h)
666 	      {
667 		int elems = 4 - (h & 3);
668 #ifdef _M_ARM64 /* for handling armasm calling convention */
669                 if (cif->is_variadic)
670                   {
671                     if (state.ngrn + elems <= N_X_ARG_REG)
672                       {
673                         dest = &context->x[state.ngrn];
674                         state.ngrn += elems;
675                         extend_hfa_type(dest, a, h);
676                         break;
677                       }
678                     state.nsrn = N_X_ARG_REG;
679                     dest = allocate_to_stack(&state, stack, ty->alignment, s);
680                   }
681                 else
682                   {
683 #endif /* for handling armasm calling convention */
684 	        if (state.nsrn + elems <= N_V_ARG_REG)
685 		  {
686 		    dest = &context->v[state.nsrn];
687 		    state.nsrn += elems;
688 		    extend_hfa_type (dest, a, h);
689 		    break;
690 		  }
691 		state.nsrn = N_V_ARG_REG;
692 		dest = allocate_to_stack (&state, stack, ty->alignment, s);
693 #ifdef _M_ARM64 /* for handling armasm calling convention */
694 	      }
695 #endif /* for handling armasm calling convention */
696 	      }
697 	    else if (s > 16)
698 	      {
699 		/* If the argument is a composite type that is larger than 16
700 		   bytes, then the argument has been copied to memory, and
701 		   the argument is replaced by a pointer to the copy.  */
702 		a = &avalue[i];
703 		t = FFI_TYPE_POINTER;
704 		s = sizeof (void *);
705 		goto do_pointer;
706 	      }
707 	    else
708 	      {
709 		size_t n = (s + 7) / 8;
710 		if (state.ngrn + n <= N_X_ARG_REG)
711 		  {
712 		    /* If the argument is a composite type and the size in
713 		       double-words is not more than the number of available
714 		       X registers, then the argument is copied into
715 		       consecutive X registers.  */
716 		    dest = &context->x[state.ngrn];
717                     state.ngrn += (unsigned int)n;
718 		  }
719 		else
720 		  {
721 		    /* Otherwise, there are insufficient X registers. Further
722 		       X register allocations are prevented, the NSAA is
723 		       adjusted and the argument is copied to memory at the
724 		       adjusted NSAA.  */
725 		    state.ngrn = N_X_ARG_REG;
726 		    dest = allocate_to_stack (&state, stack, ty->alignment, s);
727 		  }
728 		}
729 	      memcpy (dest, a, s);
730 	    }
731 	  break;
732 
733 	default:
734 	  abort();
735 	}
736 
737 #if defined (__APPLE__)
738       if (i + 1 == cif->aarch64_nfixedargs)
739 	{
740 	  state.ngrn = N_X_ARG_REG;
741 	  state.nsrn = N_V_ARG_REG;
742 	  state.allocating_variadic = 1;
743 	}
744 #endif
745     }
746 
747   ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
748 
749   if (flags & AARCH64_RET_NEED_COPY)
750     memcpy (orig_rvalue, rvalue, rtype_size);
751 }
752 
753 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)754 ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
755 {
756   ffi_call_int (cif, fn, rvalue, avalue, NULL);
757 }
758 
759 #ifdef FFI_GO_CLOSURES
760 void
ffi_call_go(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue,void * closure)761 ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
762 	     void **avalue, void *closure)
763 {
764   ffi_call_int (cif, fn, rvalue, avalue, closure);
765 }
766 #endif /* FFI_GO_CLOSURES */
767 
768 /* Build a trampoline.  */
769 
770 extern void ffi_closure_SYSV (void) FFI_HIDDEN;
771 extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
772 
773 ffi_status
ffi_prep_closure_loc(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)774 ffi_prep_closure_loc (ffi_closure *closure,
775                       ffi_cif* cif,
776                       void (*fun)(ffi_cif*,void*,void**,void*),
777                       void *user_data,
778                       void *codeloc)
779 {
780   if (cif->abi != FFI_SYSV)
781     return FFI_BAD_ABI;
782 
783   void (*start)(void);
784 
785   if (cif->flags & AARCH64_FLAG_ARG_V)
786     start = ffi_closure_SYSV_V;
787   else
788     start = ffi_closure_SYSV;
789 
790 #if FFI_EXEC_TRAMPOLINE_TABLE
791 #ifdef __MACH__
792   void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
793   config[0] = closure;
794   config[1] = start;
795 #endif
796 #else
797   static const unsigned char trampoline[16] = {
798     0x90, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/
799     0xf1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/
800     0x00, 0x02, 0x1f, 0xd6	/* br	x16		*/
801   };
802   char *tramp = closure->tramp;
803 
804   memcpy (tramp, trampoline, sizeof(trampoline));
805 
806   *(UINT64 *)(tramp + 16) = (uintptr_t)start;
807 
808   ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
809 
810   /* Also flush the cache for code mapping.  */
811 #ifdef _M_ARM64
812   // Not using dlmalloc.c for Windows ARM64 builds
813   // so calling ffi_data_to_code_pointer() isn't necessary
814   unsigned char *tramp_code = tramp;
815   #else
816   unsigned char *tramp_code = ffi_data_to_code_pointer (tramp);
817   #endif
818   ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE);
819 #endif
820 
821   closure->cif = cif;
822   closure->fun = fun;
823   closure->user_data = user_data;
824 
825   return FFI_OK;
826 }
827 
828 #ifdef FFI_GO_CLOSURES
829 extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
830 extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
831 
832 ffi_status
ffi_prep_go_closure(ffi_go_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *))833 ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
834                      void (*fun)(ffi_cif*,void*,void**,void*))
835 {
836   void (*start)(void);
837 
838   if (cif->abi != FFI_SYSV)
839     return FFI_BAD_ABI;
840 
841   if (cif->flags & AARCH64_FLAG_ARG_V)
842     start = ffi_go_closure_SYSV_V;
843   else
844     start = ffi_go_closure_SYSV;
845 
846   closure->tramp = start;
847   closure->cif = cif;
848   closure->fun = fun;
849 
850   return FFI_OK;
851 }
852 #endif /* FFI_GO_CLOSURES */
853 
854 /* Primary handler to setup and invoke a function within a closure.
855 
856    A closure when invoked enters via the assembler wrapper
857    ffi_closure_SYSV(). The wrapper allocates a call context on the
858    stack, saves the interesting registers (from the perspective of
859    the calling convention) into the context then passes control to
860    ffi_closure_SYSV_inner() passing the saved context and a pointer to
861    the stack at the point ffi_closure_SYSV() was invoked.
862 
863    On the return path the assembler wrapper will reload call context
864    registers.
865 
866    ffi_closure_SYSV_inner() marshalls the call context into ffi value
867    descriptors, invokes the wrapped function, then marshalls the return
868    value back into the call context.  */
869 
870 int FFI_HIDDEN
ffi_closure_SYSV_inner(ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,struct call_context * context,void * stack,void * rvalue,void * struct_rvalue)871 ffi_closure_SYSV_inner (ffi_cif *cif,
872 			void (*fun)(ffi_cif*,void*,void**,void*),
873 			void *user_data,
874 			struct call_context *context,
875 			void *stack, void *rvalue, void *struct_rvalue)
876 {
877   void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
878   int i, h, nargs, flags;
879   struct arg_state state;
880 
881   arg_init (&state);
882 
883   for (i = 0, nargs = cif->nargs; i < nargs; i++)
884     {
885       ffi_type *ty = cif->arg_types[i];
886       int t = ty->type;
887       size_t n, s = ty->size;
888 
889       switch (t)
890 	{
891 	case FFI_TYPE_VOID:
892 	  FFI_ASSERT (0);
893 	  break;
894 
895 	case FFI_TYPE_INT:
896 	case FFI_TYPE_UINT8:
897 	case FFI_TYPE_SINT8:
898 	case FFI_TYPE_UINT16:
899 	case FFI_TYPE_SINT16:
900 	case FFI_TYPE_UINT32:
901 	case FFI_TYPE_SINT32:
902 	case FFI_TYPE_UINT64:
903 	case FFI_TYPE_SINT64:
904 	case FFI_TYPE_POINTER:
905 	  avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
906 	  break;
907 
908 	case FFI_TYPE_FLOAT:
909 	case FFI_TYPE_DOUBLE:
910 	case FFI_TYPE_LONGDOUBLE:
911 	case FFI_TYPE_STRUCT:
912 	case FFI_TYPE_COMPLEX:
913 	  h = is_vfp_type (ty);
914 	  if (h)
915 	    {
916 	      n = 4 - (h & 3);
917 #ifdef _M_ARM64  /* for handling armasm calling convention */
918               if (cif->is_variadic)
919                 {
920                   if (state.ngrn + n <= N_X_ARG_REG)
921                     {
922                       void *reg = &context->x[state.ngrn];
923                       state.ngrn += (unsigned int)n;
924 
925                       /* Eeek! We need a pointer to the structure, however the
926                        homogeneous float elements are being passed in individual
927                        registers, therefore for float and double the structure
928                        is not represented as a contiguous sequence of bytes in
929                        our saved register context.  We don't need the original
930                        contents of the register storage, so we reformat the
931                        structure into the same memory.  */
932                       avalue[i] = compress_hfa_type(reg, reg, h);
933                     }
934                   else
935                     {
936                       state.ngrn = N_X_ARG_REG;
937                       state.nsrn = N_V_ARG_REG;
938                       avalue[i] = allocate_to_stack(&state, stack,
939                              ty->alignment, s);
940                     }
941                 }
942               else
943                 {
944 #endif  /* for handling armasm calling convention */
945                   if (state.nsrn + n <= N_V_ARG_REG)
946                     {
947                       void *reg = &context->v[state.nsrn];
948                       state.nsrn += (unsigned int)n;
949                       avalue[i] = compress_hfa_type(reg, reg, h);
950                     }
951                   else
952                     {
953                       state.nsrn = N_V_ARG_REG;
954                       avalue[i] = allocate_to_stack(&state, stack,
955                                                    ty->alignment, s);
956                     }
957 #ifdef _M_ARM64  /* for handling armasm calling convention */
958                 }
959 #endif  /* for handling armasm calling convention */
960             }
961           else if (s > 16)
962             {
963               /* Replace Composite type of size greater than 16 with a
964                   pointer.  */
965               avalue[i] = *(void **)
966               allocate_int_to_reg_or_stack (context, &state, stack,
967                                          sizeof (void *));
968             }
969           else
970             {
971               n = (s + 7) / 8;
972               if (state.ngrn + n <= N_X_ARG_REG)
973                 {
974                   avalue[i] = &context->x[state.ngrn];
975                   state.ngrn += (unsigned int)n;
976                 }
977               else
978                 {
979                   state.ngrn = N_X_ARG_REG;
980                   avalue[i] = allocate_to_stack(&state, stack,
981                                            ty->alignment, s);
982                 }
983             }
984           break;
985 
986         default:
987           abort();
988       }
989 
990 #if defined (__APPLE__)
991       if (i + 1 == cif->aarch64_nfixedargs)
992 	{
993 	  state.ngrn = N_X_ARG_REG;
994 	  state.nsrn = N_V_ARG_REG;
995 	  state.allocating_variadic = 1;
996 	}
997 #endif
998     }
999 
1000   flags = cif->flags;
1001   if (flags & AARCH64_RET_IN_MEM)
1002     rvalue = struct_rvalue;
1003 
1004   fun (cif, rvalue, avalue, user_data);
1005 
1006   return flags;
1007 }
1008 
1009 #endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/
1010