1 /* -----------------------------------------------------------------------
2    ffi_linux64.c - Copyright (C) 2013 IBM
3                    Copyright (C) 2011 Anthony Green
4                    Copyright (C) 2011 Kyle Moffett
5                    Copyright (C) 2008 Red Hat, Inc
6                    Copyright (C) 2007, 2008 Free Software Foundation, Inc
7                    Copyright (c) 1998 Geoffrey Keating
8 
9    PowerPC Foreign Function Interface
10 
11    Permission is hereby granted, free of charge, to any person obtaining
12    a copy of this software and associated documentation files (the
13    ``Software''), to deal in the Software without restriction, including
14    without limitation the rights to use, copy, modify, merge, publish,
15    distribute, sublicense, and/or sell copies of the Software, and to
16    permit persons to whom the Software is furnished to do so, subject to
17    the following conditions:
18 
19    The above copyright notice and this permission notice shall be included
20    in all copies or substantial portions of the Software.
21 
22    THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
23    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25    IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
26    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28    OTHER DEALINGS IN THE SOFTWARE.
29    ----------------------------------------------------------------------- */
30 
31 #include "ffi.h"
32 
33 #ifdef POWERPC64
34 #include "ffi_common.h"
35 #include "ffi_powerpc.h"
36 
37 
38 /* About the LINUX64 ABI.  */
39 enum {
40   NUM_GPR_ARG_REGISTERS64 = 8,
41   NUM_FPR_ARG_REGISTERS64 = 13,
42   NUM_VEC_ARG_REGISTERS64 = 12,
43 };
44 enum { ASM_NEEDS_REGISTERS64 = 4 };
45 
46 
47 #if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
48 /* Adjust size of ffi_type_longdouble.  */
49 void FFI_HIDDEN
ffi_prep_types_linux64(ffi_abi abi)50 ffi_prep_types_linux64 (ffi_abi abi)
51 {
52   if ((abi & (FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128)) == FFI_LINUX)
53     {
54       ffi_type_longdouble.size = 8;
55       ffi_type_longdouble.alignment = 8;
56     }
57   else
58     {
59       ffi_type_longdouble.size = 16;
60       ffi_type_longdouble.alignment = 16;
61     }
62 }
63 #endif
64 
65 
66 static unsigned int
discover_homogeneous_aggregate(ffi_abi abi,const ffi_type * t,unsigned int * elnum)67 discover_homogeneous_aggregate (ffi_abi abi,
68                                 const ffi_type *t,
69                                 unsigned int *elnum)
70 {
71   switch (t->type)
72     {
73 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
74     case FFI_TYPE_LONGDOUBLE:
75       /* 64-bit long doubles are equivalent to doubles. */
76       if ((abi & FFI_LINUX_LONG_DOUBLE_128) == 0)
77         {
78           *elnum = 1;
79           return FFI_TYPE_DOUBLE;
80         }
81       /* IBM extended precision values use unaligned pairs
82          of FPRs, but according to the ABI must be considered
83          distinct from doubles. They are also limited to a
84          maximum of four members in a homogeneous aggregate. */
85       else if ((abi & FFI_LINUX_LONG_DOUBLE_IEEE128) == 0)
86         {
87           *elnum = 2;
88           return FFI_TYPE_LONGDOUBLE;
89         }
90       /* Fall through. */
91 #endif
92     case FFI_TYPE_FLOAT:
93     case FFI_TYPE_DOUBLE:
94       *elnum = 1;
95       return (int) t->type;
96 
97     case FFI_TYPE_STRUCT:;
98       {
99 	unsigned int base_elt = 0, total_elnum = 0;
100 	ffi_type **el = t->elements;
101 	while (*el)
102 	  {
103 	    unsigned int el_elt, el_elnum = 0;
104 	    el_elt = discover_homogeneous_aggregate (abi, *el, &el_elnum);
105 	    if (el_elt == 0
106 		|| (base_elt && base_elt != el_elt))
107 	      return 0;
108 	    base_elt = el_elt;
109 	    total_elnum += el_elnum;
110 #if _CALL_ELF == 2
111 	    if (total_elnum > 8)
112 	      return 0;
113 #else
114 	    if (total_elnum > 1)
115 	      return 0;
116 #endif
117 	    el++;
118 	  }
119 	*elnum = total_elnum;
120 	return base_elt;
121       }
122 
123     default:
124       return 0;
125     }
126 }
127 
128 
129 /* Perform machine dependent cif processing */
130 static ffi_status
ffi_prep_cif_linux64_core(ffi_cif * cif)131 ffi_prep_cif_linux64_core (ffi_cif *cif)
132 {
133   ffi_type **ptr;
134   unsigned bytes;
135   unsigned i, fparg_count = 0, intarg_count = 0, vecarg_count = 0;
136   unsigned flags = cif->flags;
137   unsigned elt, elnum, rtype;
138 
139 #if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE
140   /* If compiled without long double support... */
141   if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0 ||
142       (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
143     return FFI_BAD_ABI;
144 #elif !defined(__VEC__)
145   /* If compiled without vector register support (used by assembly)... */
146   if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
147     return FFI_BAD_ABI;
148 #else
149   /* If the IEEE128 flag is set, but long double is only 64 bits wide... */
150   if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) == 0 &&
151       (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
152     return FFI_BAD_ABI;
153 #endif
154 
155   /* The machine-independent calculation of cif->bytes doesn't work
156      for us.  Redo the calculation.  */
157 #if _CALL_ELF == 2
158   /* Space for backchain, CR, LR, TOC and the asm's temp regs.  */
159   bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long);
160 
161   /* Space for the general registers.  */
162   bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long);
163 #else
164   /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
165      regs.  */
166   bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
167 
168   /* Space for the mandatory parm save area and general registers.  */
169   bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
170 #endif
171 
172   /* Return value handling.  */
173   rtype = cif->rtype->type;
174 #if _CALL_ELF == 2
175 homogeneous:
176 #endif
177   switch (rtype)
178     {
179 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
180     case FFI_TYPE_LONGDOUBLE:
181       if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
182         {
183           flags |= FLAG_RETURNS_VEC;
184           break;
185         }
186       if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
187 	flags |= FLAG_RETURNS_128BITS;
188       /* Fall through.  */
189 #endif
190     case FFI_TYPE_DOUBLE:
191       flags |= FLAG_RETURNS_64BITS;
192       /* Fall through.  */
193     case FFI_TYPE_FLOAT:
194       flags |= FLAG_RETURNS_FP;
195       break;
196 
197     case FFI_TYPE_UINT128:
198       flags |= FLAG_RETURNS_128BITS;
199       /* Fall through.  */
200     case FFI_TYPE_UINT64:
201     case FFI_TYPE_SINT64:
202     case FFI_TYPE_POINTER:
203       flags |= FLAG_RETURNS_64BITS;
204       break;
205 
206     case FFI_TYPE_STRUCT:
207 #if _CALL_ELF == 2
208       elt = discover_homogeneous_aggregate (cif->abi, cif->rtype, &elnum);
209       if (elt)
210         {
211           flags |= FLAG_RETURNS_SMST;
212           rtype = elt;
213           goto homogeneous;
214         }
215       if (cif->rtype->size <= 16)
216         {
217           flags |= FLAG_RETURNS_SMST;
218           break;
219         }
220 #endif
221       intarg_count++;
222       flags |= FLAG_RETVAL_REFERENCE;
223       /* Fall through.  */
224     case FFI_TYPE_VOID:
225       flags |= FLAG_RETURNS_NOTHING;
226       break;
227 
228     default:
229       /* Returns 32-bit integer, or similar.  Nothing to do here.  */
230       break;
231     }
232 
233   for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
234     {
235       unsigned int align;
236 
237       switch ((*ptr)->type)
238 	{
239 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
240 	case FFI_TYPE_LONGDOUBLE:
241           if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
242             {
243               vecarg_count++;
244               /* Align to 16 bytes, plus the 16-byte argument. */
245               intarg_count = (intarg_count + 3) & ~0x1;
246               if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
247                 flags |= FLAG_ARG_NEEDS_PSAVE;
248               break;
249             }
250 	  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
251 	    {
252 	      fparg_count++;
253 	      intarg_count++;
254 	    }
255 	  /* Fall through.  */
256 #endif
257 	case FFI_TYPE_DOUBLE:
258 	case FFI_TYPE_FLOAT:
259 	  fparg_count++;
260 	  intarg_count++;
261 	  if (fparg_count > NUM_FPR_ARG_REGISTERS64)
262 	    flags |= FLAG_ARG_NEEDS_PSAVE;
263 	  break;
264 
265 	case FFI_TYPE_STRUCT:
266 	  if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
267 	    {
268 	      align = (*ptr)->alignment;
269 	      if (align > 16)
270 		align = 16;
271 	      align = align / 8;
272 	      if (align > 1)
273 		intarg_count = FFI_ALIGN (intarg_count, align);
274 	    }
275 	  intarg_count += ((*ptr)->size + 7) / 8;
276 	  elt = discover_homogeneous_aggregate (cif->abi, *ptr, &elnum);
277 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
278           if (elt == FFI_TYPE_LONGDOUBLE &&
279               (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
280             {
281               vecarg_count += elnum;
282               if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
283                 flags |= FLAG_ARG_NEEDS_PSAVE;
284               break;
285             }
286 	  else
287 #endif
288 	  if (elt)
289 	    {
290 	      fparg_count += elnum;
291 	      if (fparg_count > NUM_FPR_ARG_REGISTERS64)
292 		flags |= FLAG_ARG_NEEDS_PSAVE;
293 	    }
294 	  else
295 	    {
296 	      if (intarg_count > NUM_GPR_ARG_REGISTERS64)
297 		flags |= FLAG_ARG_NEEDS_PSAVE;
298 	    }
299 	  break;
300 
301 	case FFI_TYPE_POINTER:
302 	case FFI_TYPE_UINT64:
303 	case FFI_TYPE_SINT64:
304 	case FFI_TYPE_INT:
305 	case FFI_TYPE_UINT32:
306 	case FFI_TYPE_SINT32:
307 	case FFI_TYPE_UINT16:
308 	case FFI_TYPE_SINT16:
309 	case FFI_TYPE_UINT8:
310 	case FFI_TYPE_SINT8:
311 	  /* Everything else is passed as a 8-byte word in a GPR, either
312 	     the object itself or a pointer to it.  */
313 	  intarg_count++;
314 	  if (intarg_count > NUM_GPR_ARG_REGISTERS64)
315 	    flags |= FLAG_ARG_NEEDS_PSAVE;
316 	  break;
317 	default:
318 	  FFI_ASSERT (0);
319 	}
320     }
321 
322   if (fparg_count != 0)
323     flags |= FLAG_FP_ARGUMENTS;
324   if (intarg_count > 4)
325     flags |= FLAG_4_GPR_ARGUMENTS;
326   if (vecarg_count != 0)
327     flags |= FLAG_VEC_ARGUMENTS;
328 
329   /* Space for the FPR registers, if needed.  */
330   if (fparg_count != 0)
331     bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
332   /* Space for the vector registers, if needed, aligned to 16 bytes. */
333   if (vecarg_count != 0) {
334     bytes = (bytes + 15) & ~0xF;
335     bytes += NUM_VEC_ARG_REGISTERS64 * sizeof (float128);
336   }
337 
338   /* Stack space.  */
339 #if _CALL_ELF == 2
340   if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0)
341     bytes += intarg_count * sizeof (long);
342 #else
343   if (intarg_count > NUM_GPR_ARG_REGISTERS64)
344     bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
345 #endif
346 
347   /* The stack space allocated needs to be a multiple of 16 bytes.  */
348   bytes = (bytes + 15) & ~0xF;
349 
350   cif->flags = flags;
351   cif->bytes = bytes;
352 
353   return FFI_OK;
354 }
355 
356 ffi_status FFI_HIDDEN
ffi_prep_cif_linux64(ffi_cif * cif)357 ffi_prep_cif_linux64 (ffi_cif *cif)
358 {
359   if ((cif->abi & FFI_LINUX) != 0)
360     cif->nfixedargs = cif->nargs;
361 #if _CALL_ELF != 2
362   else if (cif->abi == FFI_COMPAT_LINUX64)
363     {
364       /* This call is from old code.  Don't touch cif->nfixedargs
365 	 since old code will be using a smaller cif.  */
366       cif->flags |= FLAG_COMPAT;
367       /* Translate to new abi value.  */
368       cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
369     }
370 #endif
371   else
372     return FFI_BAD_ABI;
373   return ffi_prep_cif_linux64_core (cif);
374 }
375 
376 ffi_status FFI_HIDDEN
ffi_prep_cif_linux64_var(ffi_cif * cif,unsigned int nfixedargs,unsigned int ntotalargs MAYBE_UNUSED)377 ffi_prep_cif_linux64_var (ffi_cif *cif,
378 			  unsigned int nfixedargs,
379 			  unsigned int ntotalargs MAYBE_UNUSED)
380 {
381   if ((cif->abi & FFI_LINUX) != 0)
382     cif->nfixedargs = nfixedargs;
383 #if _CALL_ELF != 2
384   else if (cif->abi == FFI_COMPAT_LINUX64)
385     {
386       /* This call is from old code.  Don't touch cif->nfixedargs
387 	 since old code will be using a smaller cif.  */
388       cif->flags |= FLAG_COMPAT;
389       /* Translate to new abi value.  */
390       cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
391     }
392 #endif
393   else
394     return FFI_BAD_ABI;
395 #if _CALL_ELF == 2
396   cif->flags |= FLAG_ARG_NEEDS_PSAVE;
397 #endif
398   return ffi_prep_cif_linux64_core (cif);
399 }
400 
401 
402 /* ffi_prep_args64 is called by the assembly routine once stack space
403    has been allocated for the function's arguments.
404 
405    The stack layout we want looks like this:
406 
407    |   Ret addr from ffi_call_LINUX64	8bytes	|	higher addresses
408    |--------------------------------------------|
409    |   CR save area			8bytes	|
410    |--------------------------------------------|
411    |   Previous backchain pointer	8	|	stack pointer here
412    |--------------------------------------------|<+ <<<	on entry to
413    |   Saved r28-r31			4*8	| |	ffi_call_LINUX64
414    |--------------------------------------------| |
415    |   GPR registers r3-r10		8*8	| |
416    |--------------------------------------------| |
417    |   FPR registers f1-f13 (optional)	13*8	| |
418    |--------------------------------------------| |
419    |   VEC registers v2-v13 (optional)  12*16   | |
420    |--------------------------------------------| |
421    |   Parameter save area		        | |
422    |--------------------------------------------| |
423    |   TOC save area			8	| |
424    |--------------------------------------------| |	stack	|
425    |   Linker doubleword		8	| |	grows	|
426    |--------------------------------------------| |	down	V
427    |   Compiler doubleword		8	| |
428    |--------------------------------------------| |	lower addresses
429    |   Space for callee's LR		8	| |
430    |--------------------------------------------| |
431    |   CR save area			8	| |
432    |--------------------------------------------| |	stack pointer here
433    |   Current backchain pointer	8	|-/	during
434    |--------------------------------------------|   <<<	ffi_call_LINUX64
435 
436 */
437 
438 void FFI_HIDDEN
ffi_prep_args64(extended_cif * ecif,unsigned long * const stack)439 ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
440 {
441   const unsigned long bytes = ecif->cif->bytes;
442   const unsigned long flags = ecif->cif->flags;
443 
444   typedef union
445   {
446     char *c;
447     unsigned long *ul;
448     float *f;
449     double *d;
450     float128 *f128;
451     size_t p;
452   } valp;
453 
454   /* 'stacktop' points at the previous backchain pointer.  */
455   valp stacktop;
456 
457   /* 'next_arg' points at the space for gpr3, and grows upwards as
458      we use GPR registers, then continues at rest.  */
459   valp gpr_base;
460   valp gpr_end;
461   valp rest;
462   valp next_arg;
463 
464   /* 'fpr_base' points at the space for f1, and grows upwards as
465      we use FPR registers.  */
466   valp fpr_base;
467   unsigned int fparg_count;
468 
469   /* 'vec_base' points at the space for v2, and grows upwards as
470      we use vector registers.  */
471   valp vec_base;
472   unsigned int vecarg_count;
473 
474   unsigned int i, words, nargs, nfixedargs;
475   ffi_type **ptr;
476   double double_tmp;
477   union
478   {
479     void **v;
480     char **c;
481     signed char **sc;
482     unsigned char **uc;
483     signed short **ss;
484     unsigned short **us;
485     signed int **si;
486     unsigned int **ui;
487     unsigned long **ul;
488     float **f;
489     double **d;
490     float128 **f128;
491   } p_argv;
492   unsigned long gprvalue;
493   unsigned long align;
494 
495   stacktop.c = (char *) stack + bytes;
496   gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
497   gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
498 #if _CALL_ELF == 2
499   rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64;
500 #else
501   rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
502 #endif
503   fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
504   fparg_count = 0;
505   /* Place the vector args below the FPRs, if used, else the GPRs. */
506   if (ecif->cif->flags & FLAG_FP_ARGUMENTS)
507     vec_base.p = fpr_base.p & ~0xF;
508   else
509     vec_base.p = gpr_base.p;
510   vec_base.f128 -= NUM_VEC_ARG_REGISTERS64;
511   vecarg_count = 0;
512   next_arg.ul = gpr_base.ul;
513 
514   /* Check that everything starts aligned properly.  */
515   FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
516   FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
517   FFI_ASSERT (((unsigned long) gpr_base.c & 0xF) == 0);
518   FFI_ASSERT (((unsigned long) gpr_end.c  & 0xF) == 0);
519   FFI_ASSERT (((unsigned long) vec_base.c & 0xF) == 0);
520   FFI_ASSERT ((bytes & 0xF) == 0);
521 
522   /* Deal with return values that are actually pass-by-reference.  */
523   if (flags & FLAG_RETVAL_REFERENCE)
524     *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
525 
526   /* Now for the arguments.  */
527   p_argv.v = ecif->avalue;
528   nargs = ecif->cif->nargs;
529 #if _CALL_ELF != 2
530   nfixedargs = (unsigned) -1;
531   if ((flags & FLAG_COMPAT) == 0)
532 #endif
533     nfixedargs = ecif->cif->nfixedargs;
534   for (ptr = ecif->cif->arg_types, i = 0;
535        i < nargs;
536        i++, ptr++, p_argv.v++)
537     {
538       unsigned int elt, elnum;
539 
540       switch ((*ptr)->type)
541 	{
542 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
543 	case FFI_TYPE_LONGDOUBLE:
544           if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
545             {
546               next_arg.p = FFI_ALIGN (next_arg.p, 16);
547               if (next_arg.ul == gpr_end.ul)
548                 next_arg.ul = rest.ul;
549               if (vecarg_count < NUM_VEC_ARG_REGISTERS64 && i < nfixedargs)
550                 *vec_base.f128++ = **p_argv.f128;
551               else
552                 *next_arg.f128 = **p_argv.f128;
553               if (++next_arg.f128 == gpr_end.f128)
554                 next_arg.f128 = rest.f128;
555               vecarg_count++;
556               FFI_ASSERT (__LDBL_MANT_DIG__ == 113);
557               FFI_ASSERT (flags & FLAG_VEC_ARGUMENTS);
558               break;
559             }
560 	  if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
561 	    {
562 	      double_tmp = (*p_argv.d)[0];
563 	      if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
564 		{
565 		  *fpr_base.d++ = double_tmp;
566 # if _CALL_ELF != 2
567 		  if ((flags & FLAG_COMPAT) != 0)
568 		    *next_arg.d = double_tmp;
569 # endif
570 		}
571 	      else
572 		*next_arg.d = double_tmp;
573 	      if (++next_arg.ul == gpr_end.ul)
574 		next_arg.ul = rest.ul;
575 	      fparg_count++;
576 	      double_tmp = (*p_argv.d)[1];
577 	      if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
578 		{
579 		  *fpr_base.d++ = double_tmp;
580 # if _CALL_ELF != 2
581 		  if ((flags & FLAG_COMPAT) != 0)
582 		    *next_arg.d = double_tmp;
583 # endif
584 		}
585 	      else
586 		*next_arg.d = double_tmp;
587 	      if (++next_arg.ul == gpr_end.ul)
588 		next_arg.ul = rest.ul;
589 	      fparg_count++;
590 	      FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
591 	      FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
592 	      break;
593 	    }
594 	  /* Fall through.  */
595 #endif
596 	case FFI_TYPE_DOUBLE:
597 #if _CALL_ELF != 2
598 	do_double:
599 #endif
600 	  double_tmp = **p_argv.d;
601 	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
602 	    {
603 	      *fpr_base.d++ = double_tmp;
604 #if _CALL_ELF != 2
605 	      if ((flags & FLAG_COMPAT) != 0)
606 		*next_arg.d = double_tmp;
607 #endif
608 	    }
609 	  else
610 	    *next_arg.d = double_tmp;
611 	  if (++next_arg.ul == gpr_end.ul)
612 	    next_arg.ul = rest.ul;
613 	  fparg_count++;
614 	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
615 	  break;
616 
617 	case FFI_TYPE_FLOAT:
618 #if _CALL_ELF != 2
619 	do_float:
620 #endif
621 	  double_tmp = **p_argv.f;
622 	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
623 	    {
624 	      *fpr_base.d++ = double_tmp;
625 #if _CALL_ELF != 2
626 	      if ((flags & FLAG_COMPAT) != 0)
627 		{
628 # ifndef __LITTLE_ENDIAN__
629 		  next_arg.f[1] = (float) double_tmp;
630 # else
631 		  next_arg.f[0] = (float) double_tmp;
632 # endif
633 		}
634 #endif
635 	    }
636 	  else
637 	    {
638 # ifndef __LITTLE_ENDIAN__
639 	      next_arg.f[1] = (float) double_tmp;
640 # else
641 	      next_arg.f[0] = (float) double_tmp;
642 # endif
643 	    }
644 	  if (++next_arg.ul == gpr_end.ul)
645 	    next_arg.ul = rest.ul;
646 	  fparg_count++;
647 	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
648 	  break;
649 
650 	case FFI_TYPE_STRUCT:
651 	  if ((ecif->cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
652 	    {
653 	      align = (*ptr)->alignment;
654 	      if (align > 16)
655 		align = 16;
656 	      if (align > 1)
657                 {
658                   next_arg.p = FFI_ALIGN (next_arg.p, align);
659                   if (next_arg.ul == gpr_end.ul)
660                     next_arg.ul = rest.ul;
661                 }
662 	    }
663 	  elt = discover_homogeneous_aggregate (ecif->cif->abi, *ptr, &elnum);
664 	  if (elt)
665 	    {
666 #if _CALL_ELF == 2
667 	      union {
668 		void *v;
669 		float *f;
670 		double *d;
671 		float128 *f128;
672 	      } arg;
673 
674 	      arg.v = *p_argv.v;
675 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
676               if (elt == FFI_TYPE_LONGDOUBLE &&
677                   (ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
678                 {
679                   do
680                     {
681                       if (vecarg_count < NUM_VEC_ARG_REGISTERS64
682                           && i < nfixedargs)
683                         *vec_base.f128++ = *arg.f128++;
684                       else
685                         *next_arg.f128 = *arg.f128++;
686                       if (++next_arg.f128 == gpr_end.f128)
687                         next_arg.f128 = rest.f128;
688                       vecarg_count++;
689                     }
690                   while (--elnum != 0);
691                 }
692               else
693 #endif
694 	      if (elt == FFI_TYPE_FLOAT)
695 		{
696 		  do
697 		    {
698 		      double_tmp = *arg.f++;
699 		      if (fparg_count < NUM_FPR_ARG_REGISTERS64
700 			  && i < nfixedargs)
701 			*fpr_base.d++ = double_tmp;
702 		      else
703 			*next_arg.f = (float) double_tmp;
704 		      if (++next_arg.f == gpr_end.f)
705 			next_arg.f = rest.f;
706 		      fparg_count++;
707 		    }
708 		  while (--elnum != 0);
709 		  if ((next_arg.p & 7) != 0)
710                     if (++next_arg.f == gpr_end.f)
711                       next_arg.f = rest.f;
712 		}
713 	      else
714 		do
715 		  {
716 		    double_tmp = *arg.d++;
717 		    if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
718 		      *fpr_base.d++ = double_tmp;
719 		    else
720 		      *next_arg.d = double_tmp;
721 		    if (++next_arg.d == gpr_end.d)
722 		      next_arg.d = rest.d;
723 		    fparg_count++;
724 		  }
725 		while (--elnum != 0);
726 #else
727 	      if (elt == FFI_TYPE_FLOAT)
728 		goto do_float;
729 	      else
730 		goto do_double;
731 #endif
732 	    }
733 	  else
734 	    {
735 	      words = ((*ptr)->size + 7) / 8;
736 	      if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
737 		{
738 		  size_t first = gpr_end.c - next_arg.c;
739 		  memcpy (next_arg.c, *p_argv.c, first);
740 		  memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
741 		  next_arg.c = rest.c + words * 8 - first;
742 		}
743 	      else
744 		{
745 		  char *where = next_arg.c;
746 
747 #ifndef __LITTLE_ENDIAN__
748 		  /* Structures with size less than eight bytes are passed
749 		     left-padded.  */
750 		  if ((*ptr)->size < 8)
751 		    where += 8 - (*ptr)->size;
752 #endif
753 		  memcpy (where, *p_argv.c, (*ptr)->size);
754 		  next_arg.ul += words;
755 		  if (next_arg.ul == gpr_end.ul)
756 		    next_arg.ul = rest.ul;
757 		}
758 	    }
759 	  break;
760 
761 	case FFI_TYPE_UINT8:
762 	  gprvalue = **p_argv.uc;
763 	  goto putgpr;
764 	case FFI_TYPE_SINT8:
765 	  gprvalue = **p_argv.sc;
766 	  goto putgpr;
767 	case FFI_TYPE_UINT16:
768 	  gprvalue = **p_argv.us;
769 	  goto putgpr;
770 	case FFI_TYPE_SINT16:
771 	  gprvalue = **p_argv.ss;
772 	  goto putgpr;
773 	case FFI_TYPE_UINT32:
774 	  gprvalue = **p_argv.ui;
775 	  goto putgpr;
776 	case FFI_TYPE_INT:
777 	case FFI_TYPE_SINT32:
778 	  gprvalue = **p_argv.si;
779 	  goto putgpr;
780 
781 	case FFI_TYPE_UINT64:
782 	case FFI_TYPE_SINT64:
783 	case FFI_TYPE_POINTER:
784 	  gprvalue = **p_argv.ul;
785 	putgpr:
786 	  *next_arg.ul++ = gprvalue;
787 	  if (next_arg.ul == gpr_end.ul)
788 	    next_arg.ul = rest.ul;
789 	  break;
790 	}
791     }
792 
793   FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
794 	      || (next_arg.ul >= gpr_base.ul
795 		  && next_arg.ul <= gpr_base.ul + 4));
796 }
797 
798 
799 #if _CALL_ELF == 2
800 #define MIN_CACHE_LINE_SIZE 8
801 
802 static void
flush_icache(char * wraddr,char * xaddr,int size)803 flush_icache (char *wraddr, char *xaddr, int size)
804 {
805   int i;
806   for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
807     __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
808 		      : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
809   __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
810 		    : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
811 		    : "memory");
812 }
813 #endif
814 
815 
816 ffi_status FFI_HIDDEN
ffi_prep_closure_loc_linux64(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)817 ffi_prep_closure_loc_linux64 (ffi_closure *closure,
818 			      ffi_cif *cif,
819 			      void (*fun) (ffi_cif *, void *, void **, void *),
820 			      void *user_data,
821 			      void *codeloc)
822 {
823 #if _CALL_ELF == 2
824   unsigned int *tramp = (unsigned int *) &closure->tramp[0];
825 
826   if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
827     return FFI_BAD_ABI;
828 
829   tramp[0] = 0xe96c0018;	/* 0:	ld	11,2f-0b(12)	*/
830   tramp[1] = 0xe98c0010;	/*	ld	12,1f-0b(12)	*/
831   tramp[2] = 0x7d8903a6;	/*	mtctr	12		*/
832   tramp[3] = 0x4e800420;	/*	bctr			*/
833 				/* 1:	.quad	function_addr	*/
834 				/* 2:	.quad	context		*/
835   *(void **) &tramp[4] = (void *) ffi_closure_LINUX64;
836   *(void **) &tramp[6] = codeloc;
837   flush_icache ((char *) tramp, (char *) codeloc, 4 * 4);
838 #else
839   void **tramp = (void **) &closure->tramp[0];
840 
841   if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
842     return FFI_BAD_ABI;
843 
844   /* Copy function address and TOC from ffi_closure_LINUX64 OPD.  */
845   memcpy (&tramp[0], (void **) ffi_closure_LINUX64, sizeof (void *));
846   tramp[1] = codeloc;
847   memcpy (&tramp[2], (void **) ffi_closure_LINUX64 + 1, sizeof (void *));
848 #endif
849 
850   closure->cif = cif;
851   closure->fun = fun;
852   closure->user_data = user_data;
853 
854   return FFI_OK;
855 }
856 
857 
858 int FFI_HIDDEN
ffi_closure_helper_LINUX64(ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * rvalue,unsigned long * pst,ffi_dblfl * pfr,float128 * pvec)859 ffi_closure_helper_LINUX64 (ffi_cif *cif,
860 			    void (*fun) (ffi_cif *, void *, void **, void *),
861 			    void *user_data,
862 			    void *rvalue,
863 			    unsigned long *pst,
864                             ffi_dblfl *pfr,
865                             float128 *pvec)
866 {
867   /* rvalue is the pointer to space for return value in closure assembly */
868   /* pst is the pointer to parameter save area
869      (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
870   /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
871   /* pvec is the pointer to where v2-v13 are stored in ffi_closure_LINUX64 */
872 
873   void **avalue;
874   ffi_type **arg_types;
875   unsigned long i, avn, nfixedargs;
876   ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
877   float128 *end_pvec = pvec + NUM_VEC_ARG_REGISTERS64;
878   unsigned long align;
879 
880   avalue = alloca (cif->nargs * sizeof (void *));
881 
882   /* Copy the caller's structure return value address so that the
883      closure returns the data directly to the caller.  */
884   if (cif->rtype->type == FFI_TYPE_STRUCT
885       && (cif->flags & FLAG_RETURNS_SMST) == 0)
886     {
887       rvalue = (void *) *pst;
888       pst++;
889     }
890 
891   i = 0;
892   avn = cif->nargs;
893 #if _CALL_ELF != 2
894   nfixedargs = (unsigned) -1;
895   if ((cif->flags & FLAG_COMPAT) == 0)
896 #endif
897     nfixedargs = cif->nfixedargs;
898   arg_types = cif->arg_types;
899 
900   /* Grab the addresses of the arguments from the stack frame.  */
901   while (i < avn)
902     {
903       unsigned int elt, elnum;
904 
905       switch (arg_types[i]->type)
906 	{
907 	case FFI_TYPE_SINT8:
908 	case FFI_TYPE_UINT8:
909 #ifndef __LITTLE_ENDIAN__
910 	  avalue[i] = (char *) pst + 7;
911 	  pst++;
912 	  break;
913 #endif
914 
915 	case FFI_TYPE_SINT16:
916 	case FFI_TYPE_UINT16:
917 #ifndef __LITTLE_ENDIAN__
918 	  avalue[i] = (char *) pst + 6;
919 	  pst++;
920 	  break;
921 #endif
922 
923 	case FFI_TYPE_SINT32:
924 	case FFI_TYPE_UINT32:
925 #ifndef __LITTLE_ENDIAN__
926 	  avalue[i] = (char *) pst + 4;
927 	  pst++;
928 	  break;
929 #endif
930 
931 	case FFI_TYPE_SINT64:
932 	case FFI_TYPE_UINT64:
933 	case FFI_TYPE_POINTER:
934 	  avalue[i] = pst;
935 	  pst++;
936 	  break;
937 
938 	case FFI_TYPE_STRUCT:
939 	  if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
940 	    {
941 	      align = arg_types[i]->alignment;
942 	      if (align > 16)
943 		align = 16;
944 	      if (align > 1)
945 		pst = (unsigned long *) FFI_ALIGN ((size_t) pst, align);
946 	    }
947 	  elt = discover_homogeneous_aggregate (cif->abi, arg_types[i], &elnum);
948 	  if (elt)
949 	    {
950 #if _CALL_ELF == 2
951 	      union {
952 		void *v;
953 		unsigned long *ul;
954 		float *f;
955 		double *d;
956 		float128 *f128;
957 		size_t p;
958 	      } to, from;
959 
960 	      /* Repackage the aggregate from its parts.  The
961 		 aggregate size is not greater than the space taken by
962 		 the registers so store back to the register/parameter
963 		 save arrays.  */
964 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
965               if (elt == FFI_TYPE_LONGDOUBLE &&
966                   (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
967                 {
968                   if (pvec + elnum <= end_pvec)
969                     to.v = pvec;
970                   else
971                     to.v = pst;
972                 }
973               else
974 #endif
975 	      if (pfr + elnum <= end_pfr)
976 		to.v = pfr;
977 	      else
978 		to.v = pst;
979 
980 	      avalue[i] = to.v;
981 	      from.ul = pst;
982 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
983               if (elt == FFI_TYPE_LONGDOUBLE &&
984                   (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
985                 {
986                   do
987                     {
988                       if (pvec < end_pvec && i < nfixedargs)
989                         *to.f128 = *pvec++;
990                       else
991                         *to.f128 = *from.f128;
992                       to.f128++;
993                       from.f128++;
994                     }
995                   while (--elnum != 0);
996                 }
997               else
998 #endif
999 	      if (elt == FFI_TYPE_FLOAT)
1000 		{
1001 		  do
1002 		    {
1003 		      if (pfr < end_pfr && i < nfixedargs)
1004 			{
1005 			  *to.f = (float) pfr->d;
1006 			  pfr++;
1007 			}
1008 		      else
1009 			*to.f = *from.f;
1010 		      to.f++;
1011 		      from.f++;
1012 		    }
1013 		  while (--elnum != 0);
1014 		}
1015 	      else
1016 		{
1017 		  do
1018 		    {
1019 		      if (pfr < end_pfr && i < nfixedargs)
1020 			{
1021 			  *to.d = pfr->d;
1022 			  pfr++;
1023 			}
1024 		      else
1025 			*to.d = *from.d;
1026 		      to.d++;
1027 		      from.d++;
1028 		    }
1029 		  while (--elnum != 0);
1030 		}
1031 #else
1032 	      if (elt == FFI_TYPE_FLOAT)
1033 		goto do_float;
1034 	      else
1035 		goto do_double;
1036 #endif
1037 	    }
1038 	  else
1039 	    {
1040 #ifndef __LITTLE_ENDIAN__
1041 	      /* Structures with size less than eight bytes are passed
1042 		 left-padded.  */
1043 	      if (arg_types[i]->size < 8)
1044 		avalue[i] = (char *) pst + 8 - arg_types[i]->size;
1045 	      else
1046 #endif
1047 		avalue[i] = pst;
1048 	    }
1049 	  pst += (arg_types[i]->size + 7) / 8;
1050 	  break;
1051 
1052 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
1053 	case FFI_TYPE_LONGDOUBLE:
1054           if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
1055             {
1056               if (((unsigned long) pst & 0xF) != 0)
1057                 ++pst;
1058               if (pvec < end_pvec && i < nfixedargs)
1059                 avalue[i] = pvec++;
1060               else
1061                 avalue[i] = pst;
1062               pst += 2;
1063               break;
1064             }
1065           else if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
1066 	    {
1067 	      if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
1068 		{
1069 		  avalue[i] = pfr;
1070 		  pfr += 2;
1071 		}
1072 	      else
1073 		{
1074 		  if (pfr < end_pfr && i < nfixedargs)
1075 		    {
1076 		      /* Passed partly in f13 and partly on the stack.
1077 			 Move it all to the stack.  */
1078 		      *pst = *(unsigned long *) pfr;
1079 		      pfr++;
1080 		    }
1081 		  avalue[i] = pst;
1082 		}
1083 	      pst += 2;
1084 	      break;
1085 	    }
1086 	  /* Fall through.  */
1087 #endif
1088 	case FFI_TYPE_DOUBLE:
1089 #if _CALL_ELF != 2
1090 	do_double:
1091 #endif
1092 	  /* On the outgoing stack all values are aligned to 8 */
1093 	  /* there are 13 64bit floating point registers */
1094 
1095 	  if (pfr < end_pfr && i < nfixedargs)
1096 	    {
1097 	      avalue[i] = pfr;
1098 	      pfr++;
1099 	    }
1100 	  else
1101 	    avalue[i] = pst;
1102 	  pst++;
1103 	  break;
1104 
1105 	case FFI_TYPE_FLOAT:
1106 #if _CALL_ELF != 2
1107 	do_float:
1108 #endif
1109 	  if (pfr < end_pfr && i < nfixedargs)
1110 	    {
1111 	      /* Float values are stored as doubles in the
1112 		 ffi_closure_LINUX64 code.  Fix them here.  */
1113 	      pfr->f = (float) pfr->d;
1114 	      avalue[i] = pfr;
1115 	      pfr++;
1116 	    }
1117 	  else
1118 	    {
1119 #ifndef __LITTLE_ENDIAN__
1120 	      avalue[i] = (char *) pst + 4;
1121 #else
1122 	      avalue[i] = pst;
1123 #endif
1124 	    }
1125 	  pst++;
1126 	  break;
1127 
1128 	default:
1129 	  FFI_ASSERT (0);
1130 	}
1131 
1132       i++;
1133     }
1134 
1135   (*fun) (cif, rvalue, avalue, user_data);
1136 
1137   /* Tell ffi_closure_LINUX64 how to perform return type promotions.  */
1138   if ((cif->flags & FLAG_RETURNS_SMST) != 0)
1139     {
1140       if ((cif->flags & (FLAG_RETURNS_FP | FLAG_RETURNS_VEC)) == 0)
1141 	return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
1142       else if ((cif->flags & FLAG_RETURNS_VEC) != 0)
1143         return FFI_V2_TYPE_VECTOR_HOMOG;
1144       else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
1145 	return FFI_V2_TYPE_DOUBLE_HOMOG;
1146       else
1147 	return FFI_V2_TYPE_FLOAT_HOMOG;
1148     }
1149   if ((cif->flags & FLAG_RETURNS_VEC) != 0)
1150     return FFI_V2_TYPE_VECTOR;
1151   return cif->rtype->type;
1152 }
1153 #endif
1154