1 /* -----------------------------------------------------------------------
2 ffi.c - Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
3 Copyright (c) 2008 Red Hat, Inc.
4
5 x86-64 Foreign Function Interface
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 ``Software''), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice shall be included
16 in all copies or substantial portions of the Software.
17
18 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.
26 ----------------------------------------------------------------------- */
27
28 #include <ffi.h>
29 #include <ffi_common.h>
30
31 #include <stdlib.h>
32 #include <stdarg.h>
33
34 #ifdef __x86_64__
35
36 #define MAX_GPR_REGS 6
37 #define MAX_SSE_REGS 8
38
39 struct register_args
40 {
41 /* Registers for argument passing. */
42 UINT64 gpr[MAX_GPR_REGS];
43 __int128_t sse[MAX_SSE_REGS];
44 };
45
46 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
47 void *raddr, void (*fnaddr)(void), unsigned ssecount);
48
49 /* All reference to register classes here is identical to the code in
50 gcc/config/i386/i386.c. Do *not* change one without the other. */
51
52 /* Register class used for passing given 64bit part of the argument.
53 These represent classes as documented by the PS ABI, with the exception
54 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
55 use SF or DFmode move instead of DImode to avoid reformating penalties.
56
57 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
58 whenever possible (upper half does contain padding). */
59 enum x86_64_reg_class
60 {
61 X86_64_NO_CLASS,
62 X86_64_INTEGER_CLASS,
63 X86_64_INTEGERSI_CLASS,
64 X86_64_SSE_CLASS,
65 X86_64_SSESF_CLASS,
66 X86_64_SSEDF_CLASS,
67 X86_64_SSEUP_CLASS,
68 X86_64_X87_CLASS,
69 X86_64_X87UP_CLASS,
70 X86_64_COMPLEX_X87_CLASS,
71 X86_64_MEMORY_CLASS
72 };
73
74 #define MAX_CLASSES 4
75
76 #define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
77
78 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
79 of this code is to classify each 8bytes of incoming argument by the register
80 class and assign registers accordingly. */
81
82 /* Return the union class of CLASS1 and CLASS2.
83 See the x86-64 PS ABI for details. */
84
85 static enum x86_64_reg_class
merge_classes(enum x86_64_reg_class class1,enum x86_64_reg_class class2)86 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
87 {
88 /* Rule #1: If both classes are equal, this is the resulting class. */
89 if (class1 == class2)
90 return class1;
91
92 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
93 the other class. */
94 if (class1 == X86_64_NO_CLASS)
95 return class2;
96 if (class2 == X86_64_NO_CLASS)
97 return class1;
98
99 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
100 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
101 return X86_64_MEMORY_CLASS;
102
103 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
104 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
105 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
106 return X86_64_INTEGERSI_CLASS;
107 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
108 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
109 return X86_64_INTEGER_CLASS;
110
111 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
112 MEMORY is used. */
113 if (class1 == X86_64_X87_CLASS
114 || class1 == X86_64_X87UP_CLASS
115 || class1 == X86_64_COMPLEX_X87_CLASS
116 || class2 == X86_64_X87_CLASS
117 || class2 == X86_64_X87UP_CLASS
118 || class2 == X86_64_COMPLEX_X87_CLASS)
119 return X86_64_MEMORY_CLASS;
120
121 /* Rule #6: Otherwise class SSE is used. */
122 return X86_64_SSE_CLASS;
123 }
124
125 /* Classify the argument of type TYPE and mode MODE.
126 CLASSES will be filled by the register class used to pass each word
127 of the operand. The number of words is returned. In case the parameter
128 should be passed in memory, 0 is returned. As a special case for zero
129 sized containers, classes[0] will be NO_CLASS and 1 is returned.
130
131 See the x86-64 PS ABI for details.
132 */
133 static int
classify_argument(ffi_type * type,enum x86_64_reg_class classes[],size_t byte_offset)134 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
135 size_t byte_offset)
136 {
137 switch (type->type)
138 {
139 case FFI_TYPE_UINT8:
140 case FFI_TYPE_SINT8:
141 case FFI_TYPE_UINT16:
142 case FFI_TYPE_SINT16:
143 case FFI_TYPE_UINT32:
144 case FFI_TYPE_SINT32:
145 case FFI_TYPE_UINT64:
146 case FFI_TYPE_SINT64:
147 case FFI_TYPE_POINTER:
148 if (byte_offset + type->size <= 4)
149 classes[0] = X86_64_INTEGERSI_CLASS;
150 else
151 classes[0] = X86_64_INTEGER_CLASS;
152 return 1;
153 case FFI_TYPE_FLOAT:
154 if (byte_offset == 0)
155 classes[0] = X86_64_SSESF_CLASS;
156 else
157 classes[0] = X86_64_SSE_CLASS;
158 return 1;
159 case FFI_TYPE_DOUBLE:
160 classes[0] = X86_64_SSEDF_CLASS;
161 return 1;
162 case FFI_TYPE_LONGDOUBLE:
163 classes[0] = X86_64_X87_CLASS;
164 classes[1] = X86_64_X87UP_CLASS;
165 return 2;
166 case FFI_TYPE_STRUCT:
167 {
168 const int UNITS_PER_WORD = 8;
169 int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
170 ffi_type **ptr;
171 int i;
172 enum x86_64_reg_class subclasses[MAX_CLASSES];
173
174 /* If the struct is larger than 16 bytes, pass it on the stack. */
175 if (type->size > 16)
176 return 0;
177
178 for (i = 0; i < words; i++)
179 classes[i] = X86_64_NO_CLASS;
180
181 /* Merge the fields of structure. */
182 for (ptr = type->elements; *ptr != NULL; ptr++)
183 {
184 int num;
185
186 byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
187
188 num = classify_argument (*ptr, subclasses, byte_offset % 8);
189 if (num == 0)
190 return 0;
191 for (i = 0; i < num; i++)
192 {
193 int pos = byte_offset / 8;
194 classes[i + pos] =
195 merge_classes (subclasses[i], classes[i + pos]);
196 }
197
198 byte_offset += (*ptr)->size;
199 }
200
201 /* Final merger cleanup. */
202 for (i = 0; i < words; i++)
203 {
204 /* If one class is MEMORY, everything should be passed in
205 memory. */
206 if (classes[i] == X86_64_MEMORY_CLASS)
207 return 0;
208
209 /* The X86_64_SSEUP_CLASS should be always preceded by
210 X86_64_SSE_CLASS. */
211 if (classes[i] == X86_64_SSEUP_CLASS
212 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
213 classes[i] = X86_64_SSE_CLASS;
214
215 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
216 if (classes[i] == X86_64_X87UP_CLASS
217 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
218 classes[i] = X86_64_SSE_CLASS;
219 }
220 return words;
221 }
222
223 default:
224 FFI_ASSERT(0);
225 }
226 return 0; /* Never reached. */
227 }
228
229 /* Examine the argument and return set number of register required in each
230 class. Return zero iff parameter should be passed in memory, otherwise
231 the number of registers. */
232
233 static int
examine_argument(ffi_type * type,enum x86_64_reg_class classes[MAX_CLASSES],_Bool in_return,int * pngpr,int * pnsse)234 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
235 _Bool in_return, int *pngpr, int *pnsse)
236 {
237 int i, n, ngpr, nsse;
238
239 n = classify_argument (type, classes, 0);
240 if (n == 0)
241 return 0;
242
243 ngpr = nsse = 0;
244 for (i = 0; i < n; ++i)
245 switch (classes[i])
246 {
247 case X86_64_INTEGER_CLASS:
248 case X86_64_INTEGERSI_CLASS:
249 ngpr++;
250 break;
251 case X86_64_SSE_CLASS:
252 case X86_64_SSESF_CLASS:
253 case X86_64_SSEDF_CLASS:
254 nsse++;
255 break;
256 case X86_64_NO_CLASS:
257 case X86_64_SSEUP_CLASS:
258 break;
259 case X86_64_X87_CLASS:
260 case X86_64_X87UP_CLASS:
261 case X86_64_COMPLEX_X87_CLASS:
262 return in_return != 0;
263 default:
264 abort ();
265 }
266
267 *pngpr = ngpr;
268 *pnsse = nsse;
269
270 return n;
271 }
272
273 /* Perform machine dependent cif processing. */
274
275 ffi_status
ffi_prep_cif_machdep(ffi_cif * cif)276 ffi_prep_cif_machdep (ffi_cif *cif)
277 {
278 int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
279 enum x86_64_reg_class classes[MAX_CLASSES];
280 size_t bytes;
281
282 gprcount = ssecount = 0;
283
284 flags = cif->rtype->type;
285 if (flags != FFI_TYPE_VOID)
286 {
287 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
288 if (n == 0)
289 {
290 /* The return value is passed in memory. A pointer to that
291 memory is the first argument. Allocate a register for it. */
292 gprcount++;
293 /* We don't have to do anything in asm for the return. */
294 flags = FFI_TYPE_VOID;
295 }
296 else if (flags == FFI_TYPE_STRUCT)
297 {
298 /* Mark which registers the result appears in. */
299 _Bool sse0 = SSE_CLASS_P (classes[0]);
300 _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
301 if (sse0 && !sse1)
302 flags |= 1 << 8;
303 else if (!sse0 && sse1)
304 flags |= 1 << 9;
305 else if (sse0 && sse1)
306 flags |= 1 << 10;
307 /* Mark the true size of the structure. */
308 flags |= cif->rtype->size << 12;
309 }
310 }
311
312 /* Go over all arguments and determine the way they should be passed.
313 If it's in a register and there is space for it, let that be so. If
314 not, add it's size to the stack byte count. */
315 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
316 {
317 if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
318 || gprcount + ngpr > MAX_GPR_REGS
319 || ssecount + nsse > MAX_SSE_REGS)
320 {
321 long align = cif->arg_types[i]->alignment;
322
323 if (align < 8)
324 align = 8;
325
326 bytes = ALIGN(bytes, align);
327 bytes += cif->arg_types[i]->size;
328 }
329 else
330 {
331 gprcount += ngpr;
332 ssecount += nsse;
333 }
334 }
335 if (ssecount)
336 flags |= 1 << 11;
337 cif->flags = flags;
338 cif->bytes = bytes;
339
340 return FFI_OK;
341 }
342
343 void
ffi_call(ffi_cif * cif,void (* fn)(void),void * rvalue,void ** avalue)344 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
345 {
346 enum x86_64_reg_class classes[MAX_CLASSES];
347 char *stack, *argp;
348 ffi_type **arg_types;
349 int gprcount, ssecount, ngpr, nsse, i, avn;
350 _Bool ret_in_memory;
351 struct register_args *reg_args;
352
353 /* Can't call 32-bit mode from 64-bit mode. */
354 FFI_ASSERT (cif->abi == FFI_UNIX64);
355
356 /* If the return value is a struct and we don't have a return value
357 address then we need to make one. Note the setting of flags to
358 VOID above in ffi_prep_cif_machdep. */
359 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
360 && (cif->flags & 0xff) == FFI_TYPE_VOID);
361 if (rvalue == NULL && ret_in_memory)
362 rvalue = alloca (cif->rtype->size);
363
364 /* Allocate the space for the arguments, plus 4 words of temp space. */
365 stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
366 reg_args = (struct register_args *) stack;
367 argp = stack + sizeof (struct register_args);
368
369 gprcount = ssecount = 0;
370
371 /* If the return value is passed in memory, add the pointer as the
372 first integer argument. */
373 if (ret_in_memory)
374 reg_args->gpr[gprcount++] = (long) rvalue;
375
376 avn = cif->nargs;
377 arg_types = cif->arg_types;
378
379 for (i = 0; i < avn; ++i)
380 {
381 size_t size = arg_types[i]->size;
382 int n;
383
384 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
385 if (n == 0
386 || gprcount + ngpr > MAX_GPR_REGS
387 || ssecount + nsse > MAX_SSE_REGS)
388 {
389 long align = arg_types[i]->alignment;
390
391 /* Stack arguments are *always* at least 8 byte aligned. */
392 if (align < 8)
393 align = 8;
394
395 /* Pass this argument in memory. */
396 argp = (void *) ALIGN (argp, align);
397 memcpy (argp, avalue[i], size);
398 argp += size;
399 }
400 else
401 {
402 /* The argument is passed entirely in registers. */
403 char *a = (char *) avalue[i];
404 int j;
405
406 for (j = 0; j < n; j++, a += 8, size -= 8)
407 {
408 switch (classes[j])
409 {
410 case X86_64_INTEGER_CLASS:
411 case X86_64_INTEGERSI_CLASS:
412 reg_args->gpr[gprcount] = 0;
413 memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8);
414 gprcount++;
415 break;
416 case X86_64_SSE_CLASS:
417 case X86_64_SSEDF_CLASS:
418 reg_args->sse[ssecount++] = *(UINT64 *) a;
419 break;
420 case X86_64_SSESF_CLASS:
421 reg_args->sse[ssecount++] = *(UINT32 *) a;
422 break;
423 default:
424 abort();
425 }
426 }
427 }
428 }
429
430 ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
431 cif->flags, rvalue, fn, ssecount);
432 }
433
434
435 extern void ffi_closure_unix64(void);
436
437 ffi_status
ffi_prep_closure_loc(ffi_closure * closure,ffi_cif * cif,void (* fun)(ffi_cif *,void *,void **,void *),void * user_data,void * codeloc)438 ffi_prep_closure_loc (ffi_closure* closure,
439 ffi_cif* cif,
440 void (*fun)(ffi_cif*, void*, void**, void*),
441 void *user_data,
442 void *codeloc)
443 {
444 volatile unsigned short *tramp;
445
446 tramp = (volatile unsigned short *) &closure->tramp[0];
447
448 tramp[0] = 0xbb49; /* mov <code>, %r11 */
449 *(void * volatile *) &tramp[1] = ffi_closure_unix64;
450 tramp[5] = 0xba49; /* mov <data>, %r10 */
451 *(void * volatile *) &tramp[6] = codeloc;
452
453 /* Set the carry bit iff the function uses any sse registers.
454 This is clc or stc, together with the first byte of the jmp. */
455 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
456
457 tramp[11] = 0xe3ff; /* jmp *%r11 */
458
459 closure->cif = cif;
460 closure->fun = fun;
461 closure->user_data = user_data;
462
463 return FFI_OK;
464 }
465
466 int
ffi_closure_unix64_inner(ffi_closure * closure,void * rvalue,struct register_args * reg_args,char * argp)467 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
468 struct register_args *reg_args, char *argp)
469 {
470 ffi_cif *cif;
471 void **avalue;
472 ffi_type **arg_types;
473 long i, avn;
474 int gprcount, ssecount, ngpr, nsse;
475 int ret;
476
477 cif = closure->cif;
478 avalue = alloca(cif->nargs * sizeof(void *));
479 gprcount = ssecount = 0;
480
481 ret = cif->rtype->type;
482 if (ret != FFI_TYPE_VOID)
483 {
484 enum x86_64_reg_class classes[MAX_CLASSES];
485 int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
486 if (n == 0)
487 {
488 /* The return value goes in memory. Arrange for the closure
489 return value to go directly back to the original caller. */
490 rvalue = (void *) reg_args->gpr[gprcount++];
491 /* We don't have to do anything in asm for the return. */
492 ret = FFI_TYPE_VOID;
493 }
494 else if (ret == FFI_TYPE_STRUCT && n == 2)
495 {
496 /* Mark which register the second word of the structure goes in. */
497 _Bool sse0 = SSE_CLASS_P (classes[0]);
498 _Bool sse1 = SSE_CLASS_P (classes[1]);
499 if (!sse0 && sse1)
500 ret |= 1 << 8;
501 else if (sse0 && !sse1)
502 ret |= 1 << 9;
503 }
504 }
505
506 avn = cif->nargs;
507 arg_types = cif->arg_types;
508
509 for (i = 0; i < avn; ++i)
510 {
511 enum x86_64_reg_class classes[MAX_CLASSES];
512 int n;
513
514 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
515 if (n == 0
516 || gprcount + ngpr > MAX_GPR_REGS
517 || ssecount + nsse > MAX_SSE_REGS)
518 {
519 long align = arg_types[i]->alignment;
520
521 /* Stack arguments are *always* at least 8 byte aligned. */
522 if (align < 8)
523 align = 8;
524
525 /* Pass this argument in memory. */
526 argp = (void *) ALIGN (argp, align);
527 avalue[i] = argp;
528 argp += arg_types[i]->size;
529 }
530 /* If the argument is in a single register, or two consecutive
531 registers, then we can use that address directly. */
532 else if (n == 1
533 || (n == 2
534 && SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
535 {
536 /* The argument is in a single register. */
537 if (SSE_CLASS_P (classes[0]))
538 {
539 avalue[i] = ®_args->sse[ssecount];
540 ssecount += n;
541 }
542 else
543 {
544 avalue[i] = ®_args->gpr[gprcount];
545 gprcount += n;
546 }
547 }
548 /* Otherwise, allocate space to make them consecutive. */
549 else
550 {
551 char *a = alloca (16);
552 int j;
553
554 avalue[i] = a;
555 for (j = 0; j < n; j++, a += 8)
556 {
557 if (SSE_CLASS_P (classes[j]))
558 memcpy (a, ®_args->sse[ssecount++], 8);
559 else
560 memcpy (a, ®_args->gpr[gprcount++], 8);
561 }
562 }
563 }
564
565 /* Invoke the closure. */
566 closure->fun (cif, rvalue, avalue, closure->user_data);
567
568 /* Tell assembly how to perform return type promotions. */
569 return ret;
570 }
571
572 #endif /* __x86_64__ */
573