1/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. 2Permission is hereby granted, free of charge, to any person obtaining 3a copy of this software and associated documentation files (the 4``Software''), to deal in the Software without restriction, including 5without limitation the rights to use, copy, modify, merge, publish, 6distribute, sublicense, and/or sell copies of the Software, and to 7permit persons to whom the Software is furnished to do so, subject to 8the following conditions: 9The above copyright notice and this permission notice shall be 10included in all copies or substantial portions of the Software. 11THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 12EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 13MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 14IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 15CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 18 19#define LIBFFI_ASM 20#include <fficonfig.h> 21#include <ffi.h> 22#include <ffi_cfi.h> 23#include "internal.h" 24 25 OPT 2 /*disable listing */ 26/* For some macros to add unwind information */ 27#include "ksarm64.h" 28 OPT 1 /*re-enable listing */ 29 30#define BE(X) 0 31#define PTR_REG(n) x##n 32#define PTR_SIZE 8 33 34 IMPORT ffi_closure_SYSV_inner 35 EXPORT ffi_call_SYSV 36 EXPORT ffi_closure_SYSV_V 37 EXPORT ffi_closure_SYSV 38 EXPORT extend_hfa_type 39 EXPORT compress_hfa_type 40#ifdef FFI_GO_CLOSURES 41 EXPORT ffi_go_closure_SYSV_V 42 EXPORT ffi_go_closure_SYSV 43#endif 44 45 TEXTAREA, ALLIGN=8 46 47/* ffi_call_SYSV 48 extern void ffi_call_SYSV (void *stack, void *frame, 49 void (*fn)(void), void *rvalue, 50 int flags, void *closure); 51 Therefore on entry we have: 52 x0 stack 53 x1 frame 54 x2 fn 55 x3 rvalue 56 x4 flags 57 x5 closure 58*/ 59 60 NESTED_ENTRY ffi_call_SYSV_fake 61 62 /* For unwind information, Windows has to store fp and lr */ 63 PROLOG_SAVE_REG_PAIR x29, x30, #-32! 64 65 ALTERNATE_ENTRY ffi_call_SYSV 66 /* Use a stack frame allocated by our caller. */ 67 stp x29, x30, [x1] 68 mov x29, x1 69 mov sp, x0 70 71 mov x9, x2 /* save fn */ 72 mov x8, x3 /* install structure return */ 73#ifdef FFI_GO_CLOSURES 74 /*mov x18, x5 install static chain */ 75#endif 76 stp x3, x4, [x29, #16] /* save rvalue and flags */ 77 78 /* Load the vector argument passing registers, if necessary. */ 79 tbz x4, #AARCH64_FLAG_ARG_V_BIT, ffi_call_SYSV_L1 80 ldp q0, q1, [sp, #0] 81 ldp q2, q3, [sp, #32] 82 ldp q4, q5, [sp, #64] 83 ldp q6, q7, [sp, #96] 84 85ffi_call_SYSV_L1 86 /* Load the core argument passing registers, including 87 the structure return pointer. */ 88 ldp x0, x1, [sp, #16*N_V_ARG_REG + 0] 89 ldp x2, x3, [sp, #16*N_V_ARG_REG + 16] 90 ldp x4, x5, [sp, #16*N_V_ARG_REG + 32] 91 ldp x6, x7, [sp, #16*N_V_ARG_REG + 48] 92 93 /* Deallocate the context, leaving the stacked arguments. */ 94 add sp, sp, #CALL_CONTEXT_SIZE 95 96 blr x9 /* call fn */ 97 98 ldp x3, x4, [x29, #16] /* reload rvalue and flags */ 99 100 /* Partially deconstruct the stack frame. */ 101 mov sp, x29 102 ldp x29, x30, [x29] 103 104 /* Save the return value as directed. */ 105 adr x5, ffi_call_SYSV_return 106 and w4, w4, #AARCH64_RET_MASK 107 add x5, x5, x4, lsl #3 108 br x5 109 110 /* Note that each table entry is 2 insns, and thus 8 bytes. 111 For integer data, note that we're storing into ffi_arg 112 and therefore we want to extend to 64 bits; these types 113 have two consecutive entries allocated for them. */ 114 ALIGN 4 115ffi_call_SYSV_return 116 ret /* VOID */ 117 nop 118 str x0, [x3] /* INT64 */ 119 ret 120 stp x0, x1, [x3] /* INT128 */ 121 ret 122 brk #1000 /* UNUSED */ 123 ret 124 brk #1000 /* UNUSED */ 125 ret 126 brk #1000 /* UNUSED */ 127 ret 128 brk #1000 /* UNUSED */ 129 ret 130 brk #1000 /* UNUSED */ 131 ret 132 st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */ 133 ret 134 st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */ 135 ret 136 stp s0, s1, [x3] /* S2 */ 137 ret 138 str s0, [x3] /* S1 */ 139 ret 140 st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */ 141 ret 142 st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */ 143 ret 144 stp d0, d1, [x3] /* D2 */ 145 ret 146 str d0, [x3] /* D1 */ 147 ret 148 str q3, [x3, #48] /* Q4 */ 149 nop 150 str q2, [x3, #32] /* Q3 */ 151 nop 152 stp q0, q1, [x3] /* Q2 */ 153 ret 154 str q0, [x3] /* Q1 */ 155 ret 156 uxtb w0, w0 /* UINT8 */ 157 str x0, [x3] 158 ret /* reserved */ 159 nop 160 uxth w0, w0 /* UINT16 */ 161 str x0, [x3] 162 ret /* reserved */ 163 nop 164 mov w0, w0 /* UINT32 */ 165 str x0, [x3] 166 ret /* reserved */ 167 nop 168 sxtb x0, w0 /* SINT8 */ 169 str x0, [x3] 170 ret /* reserved */ 171 nop 172 sxth x0, w0 /* SINT16 */ 173 str x0, [x3] 174 ret /* reserved */ 175 nop 176 sxtw x0, w0 /* SINT32 */ 177 str x0, [x3] 178 ret /* reserved */ 179 nop 180 181 182 NESTED_END ffi_call_SYSV_fake 183 184 185/* ffi_closure_SYSV 186 Closure invocation glue. This is the low level code invoked directly by 187 the closure trampoline to setup and call a closure. 188 On entry x17 points to a struct ffi_closure, x16 has been clobbered 189 all other registers are preserved. 190 We allocate a call context and save the argument passing registers, 191 then invoked the generic C ffi_closure_SYSV_inner() function to do all 192 the real work, on return we load the result passing registers back from 193 the call context. 194*/ 195 196#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64) 197 198 NESTED_ENTRY ffi_closure_SYSV_V 199 PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! 200 201 /* Save the argument passing vector registers. */ 202 stp q0, q1, [sp, #16 + 0] 203 stp q2, q3, [sp, #16 + 32] 204 stp q4, q5, [sp, #16 + 64] 205 stp q6, q7, [sp, #16 + 96] 206 207 b ffi_closure_SYSV_save_argument 208 NESTED_END ffi_closure_SYSV_V 209 210 NESTED_ENTRY ffi_closure_SYSV 211 PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! 212 213ffi_closure_SYSV_save_argument 214 /* Save the argument passing core registers. */ 215 stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] 216 stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] 217 stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] 218 stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] 219 220 /* Load ffi_closure_inner arguments. */ 221 ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */ 222 ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */ 223 224do_closure 225 add x3, sp, #16 /* load context */ 226 add x4, sp, #ffi_closure_SYSV_FS /* load stack */ 227 add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */ 228 mov x6, x8 /* load struct_rval */ 229 230 bl ffi_closure_SYSV_inner 231 232 /* Load the return value as directed. */ 233 adr x1, ffi_closure_SYSV_return_base 234 and w0, w0, #AARCH64_RET_MASK 235 add x1, x1, x0, lsl #3 236 add x3, sp, #16+CALL_CONTEXT_SIZE 237 br x1 238 239 /* Note that each table entry is 2 insns, and thus 8 bytes. */ 240 ALIGN 8 241ffi_closure_SYSV_return_base 242 b ffi_closure_SYSV_epilog /* VOID */ 243 nop 244 ldr x0, [x3] /* INT64 */ 245 b ffi_closure_SYSV_epilog 246 ldp x0, x1, [x3] /* INT128 */ 247 b ffi_closure_SYSV_epilog 248 brk #1000 /* UNUSED */ 249 nop 250 brk #1000 /* UNUSED */ 251 nop 252 brk #1000 /* UNUSED */ 253 nop 254 brk #1000 /* UNUSED */ 255 nop 256 brk #1000 /* UNUSED */ 257 nop 258 ldr s3, [x3, #12] /* S4 */ 259 nop 260 ldr s2, [x3, #8] /* S3 */ 261 nop 262 ldp s0, s1, [x3] /* S2 */ 263 b ffi_closure_SYSV_epilog 264 ldr s0, [x3] /* S1 */ 265 b ffi_closure_SYSV_epilog 266 ldr d3, [x3, #24] /* D4 */ 267 nop 268 ldr d2, [x3, #16] /* D3 */ 269 nop 270 ldp d0, d1, [x3] /* D2 */ 271 b ffi_closure_SYSV_epilog 272 ldr d0, [x3] /* D1 */ 273 b ffi_closure_SYSV_epilog 274 ldr q3, [x3, #48] /* Q4 */ 275 nop 276 ldr q2, [x3, #32] /* Q3 */ 277 nop 278 ldp q0, q1, [x3] /* Q2 */ 279 b ffi_closure_SYSV_epilog 280 ldr q0, [x3] /* Q1 */ 281 b ffi_closure_SYSV_epilog 282 ldrb w0, [x3, #BE(7)] /* UINT8 */ 283 b ffi_closure_SYSV_epilog 284 brk #1000 /* reserved */ 285 nop 286 ldrh w0, [x3, #BE(6)] /* UINT16 */ 287 b ffi_closure_SYSV_epilog 288 brk #1000 /* reserved */ 289 nop 290 ldr w0, [x3, #BE(4)] /* UINT32 */ 291 b ffi_closure_SYSV_epilog 292 brk #1000 /* reserved */ 293 nop 294 ldrsb x0, [x3, #BE(7)] /* SINT8 */ 295 b ffi_closure_SYSV_epilog 296 brk #1000 /* reserved */ 297 nop 298 ldrsh x0, [x3, #BE(6)] /* SINT16 */ 299 b ffi_closure_SYSV_epilog 300 brk #1000 /* reserved */ 301 nop 302 ldrsw x0, [x3, #BE(4)] /* SINT32 */ 303 nop 304 /* reserved */ 305 306ffi_closure_SYSV_epilog 307 EPILOG_RESTORE_REG_PAIR x29, x30, #ffi_closure_SYSV_FS! 308 EPILOG_RETURN 309 NESTED_END ffi_closure_SYSV 310 311 312#ifdef FFI_GO_CLOSURES 313 NESTED_ENTRY ffi_go_closure_SYSV_V 314 PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! 315 316 /* Save the argument passing vector registers. */ 317 stp q0, q1, [sp, #16 + 0] 318 stp q2, q3, [sp, #16 + 32] 319 stp q4, q5, [sp, #16 + 64] 320 stp q6, q7, [sp, #16 + 96] 321 b ffi_go_closure_SYSV_save_argument 322 NESTED_END ffi_go_closure_SYSV_V 323 324 NESTED_ENTRY ffi_go_closure_SYSV 325 PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! 326 327ffi_go_closure_SYSV_save_argument 328 /* Save the argument passing core registers. */ 329 stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] 330 stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] 331 stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] 332 stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] 333 334 /* Load ffi_closure_inner arguments. */ 335 ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */ 336 mov x2, x18 /* load user_data */ 337 b do_closure 338 NESTED_END ffi_go_closure_SYSV 339 340#endif /* FFI_GO_CLOSURES */ 341 342 343/* void extend_hfa_type (void *dest, void *src, int h) */ 344 345 LEAF_ENTRY extend_hfa_type 346 347 adr x3, extend_hfa_type_jump_base 348 and w2, w2, #AARCH64_RET_MASK 349 sub x2, x2, #AARCH64_RET_S4 350 add x3, x3, x2, lsl #4 351 br x3 352 353 ALIGN 4 354extend_hfa_type_jump_base 355 ldp s16, s17, [x1] /* S4 */ 356 ldp s18, s19, [x1, #8] 357 b extend_hfa_type_store_4 358 nop 359 360 ldp s16, s17, [x1] /* S3 */ 361 ldr s18, [x1, #8] 362 b extend_hfa_type_store_3 363 nop 364 365 ldp s16, s17, [x1] /* S2 */ 366 b extend_hfa_type_store_2 367 nop 368 nop 369 370 ldr s16, [x1] /* S1 */ 371 b extend_hfa_type_store_1 372 nop 373 nop 374 375 ldp d16, d17, [x1] /* D4 */ 376 ldp d18, d19, [x1, #16] 377 b extend_hfa_type_store_4 378 nop 379 380 ldp d16, d17, [x1] /* D3 */ 381 ldr d18, [x1, #16] 382 b extend_hfa_type_store_3 383 nop 384 385 ldp d16, d17, [x1] /* D2 */ 386 b extend_hfa_type_store_2 387 nop 388 nop 389 390 ldr d16, [x1] /* D1 */ 391 b extend_hfa_type_store_1 392 nop 393 nop 394 395 ldp q16, q17, [x1] /* Q4 */ 396 ldp q18, q19, [x1, #16] 397 b extend_hfa_type_store_4 398 nop 399 400 ldp q16, q17, [x1] /* Q3 */ 401 ldr q18, [x1, #16] 402 b extend_hfa_type_store_3 403 nop 404 405 ldp q16, q17, [x1] /* Q2 */ 406 b extend_hfa_type_store_2 407 nop 408 nop 409 410 ldr q16, [x1] /* Q1 */ 411 b extend_hfa_type_store_1 412 413extend_hfa_type_store_4 414 str q19, [x0, #48] 415extend_hfa_type_store_3 416 str q18, [x0, #32] 417extend_hfa_type_store_2 418 str q17, [x0, #16] 419extend_hfa_type_store_1 420 str q16, [x0] 421 ret 422 423 LEAF_END extend_hfa_type 424 425 426/* void compress_hfa_type (void *dest, void *reg, int h) */ 427 428 LEAF_ENTRY compress_hfa_type 429 430 adr x3, compress_hfa_type_jump_base 431 and w2, w2, #AARCH64_RET_MASK 432 sub x2, x2, #AARCH64_RET_S4 433 add x3, x3, x2, lsl #4 434 br x3 435 436 ALIGN 4 437compress_hfa_type_jump_base 438 ldp q16, q17, [x1] /* S4 */ 439 ldp q18, q19, [x1, #32] 440 st4 { v16.s, v17.s, v18.s, v19.s }[0], [x0] 441 ret 442 443 ldp q16, q17, [x1] /* S3 */ 444 ldr q18, [x1, #32] 445 st3 { v16.s, v17.s, v18.s }[0], [x0] 446 ret 447 448 ldp q16, q17, [x1] /* S2 */ 449 st2 { v16.s, v17.s }[0], [x0] 450 ret 451 nop 452 453 ldr q16, [x1] /* S1 */ 454 st1 { v16.s }[0], [x0] 455 ret 456 nop 457 458 ldp q16, q17, [x1] /* D4 */ 459 ldp q18, q19, [x1, #32] 460 st4 { v16.d, v17.d, v18.d, v19.d }[0], [x0] 461 ret 462 463 ldp q16, q17, [x1] /* D3 */ 464 ldr q18, [x1, #32] 465 st3 { v16.d, v17.d, v18.d }[0], [x0] 466 ret 467 468 ldp q16, q17, [x1] /* D2 */ 469 st2 { v16.d, v17.d }[0], [x0] 470 ret 471 nop 472 473 ldr q16, [x1] /* D1 */ 474 st1 { v16.d }[0], [x0] 475 ret 476 nop 477 478 ldp q16, q17, [x1] /* Q4 */ 479 ldp q18, q19, [x1, #32] 480 b compress_hfa_type_store_q4 481 nop 482 483 ldp q16, q17, [x1] /* Q3 */ 484 ldr q18, [x1, #32] 485 b compress_hfa_type_store_q3 486 nop 487 488 ldp q16, q17, [x1] /* Q2 */ 489 stp q16, q17, [x0] 490 ret 491 nop 492 493 ldr q16, [x1] /* Q1 */ 494 str q16, [x0] 495 ret 496 497compress_hfa_type_store_q4 498 str q19, [x0, #48] 499compress_hfa_type_store_q3 500 str q18, [x0, #32] 501 stp q16, q17, [x0] 502 ret 503 504 LEAF_END compress_hfa_type 505 506 END