1/* ----------------------------------------------------------------------- 2 tile.S - Copyright (c) 2011 Tilera Corp. 3 4 Tilera TILEPro and TILE-Gx Foreign Function Interface 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 ``Software''), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice shall be included 15 in all copies or substantial portions of the Software. 16 17 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 DEALINGS IN THE SOFTWARE. 25 ----------------------------------------------------------------------- */ 26 27#define LIBFFI_ASM 28#include <fficonfig.h> 29#include <ffi.h> 30 31/* Number of bytes in a register. */ 32#define REG_SIZE FFI_SIZEOF_ARG 33 34/* Number of bytes in stack linkage area for backtracing. 35 36 A note about the ABI: on entry to a procedure, sp points to a stack 37 slot where it must spill the return address if it's not a leaf. 38 REG_SIZE bytes beyond that is a slot owned by the caller which 39 contains the sp value that the caller had when it was originally 40 entered (i.e. the caller's frame pointer). */ 41#define LINKAGE_SIZE (2 * REG_SIZE) 42 43/* The first 10 registers are used to pass arguments and return values. */ 44#define NUM_ARG_REGS 10 45 46#ifdef __tilegx__ 47#define SW st 48#define LW ld 49#define BGZT bgtzt 50#else 51#define SW sw 52#define LW lw 53#define BGZT bgzt 54#endif 55 56 57/* void ffi_call_tile (int_reg_t reg_args[NUM_ARG_REGS], 58 const int_reg_t *stack_args, 59 unsigned long stack_args_bytes, 60 void (*fnaddr)(void)); 61 62 On entry, REG_ARGS contain the outgoing register values, 63 and STACK_ARGS contains STACK_ARG_BYTES of additional values 64 to be passed on the stack. If STACK_ARG_BYTES is zero, then 65 STACK_ARGS is ignored. 66 67 When the invoked function returns, the values of r0-r9 are 68 blindly stored back into REG_ARGS for the caller to examine. */ 69 70 .section .text.ffi_call_tile, "ax", @progbits 71 .align 8 72 .globl ffi_call_tile 73 FFI_HIDDEN(ffi_call_tile) 74ffi_call_tile: 75 76/* Incoming arguments. */ 77#define REG_ARGS r0 78#define INCOMING_STACK_ARGS r1 79#define STACK_ARG_BYTES r2 80#define ORIG_FNADDR r3 81 82/* Temporary values. */ 83#define FRAME_SIZE r10 84#define TMP r11 85#define TMP2 r12 86#define OUTGOING_STACK_ARGS r13 87#define REG_ADDR_PTR r14 88#define RETURN_REG_ADDR r15 89#define FNADDR r16 90 91 .cfi_startproc 92 { 93 /* Save return address. */ 94 SW sp, lr 95 .cfi_offset lr, 0 96 /* Prepare to spill incoming r52. */ 97 addi TMP, sp, -REG_SIZE 98 /* Increase frame size to have room to spill r52 and REG_ARGS. 99 The +7 is to round up mod 8. */ 100 addi FRAME_SIZE, STACK_ARG_BYTES, \ 101 REG_SIZE + REG_SIZE + LINKAGE_SIZE + 7 102 } 103 { 104 /* Round stack frame size to a multiple of 8 to satisfy ABI. */ 105 andi FRAME_SIZE, FRAME_SIZE, -8 106 /* Compute where to spill REG_ARGS value. */ 107 addi TMP2, sp, -(REG_SIZE * 2) 108 } 109 { 110 /* Spill incoming r52. */ 111 SW TMP, r52 112 .cfi_offset r52, -REG_SIZE 113 /* Set up our frame pointer. */ 114 move r52, sp 115 .cfi_def_cfa_register r52 116 /* Push stack frame. */ 117 sub sp, sp, FRAME_SIZE 118 } 119 { 120 /* Prepare to set up stack linkage. */ 121 addi TMP, sp, REG_SIZE 122 /* Prepare to memcpy stack args. */ 123 addi OUTGOING_STACK_ARGS, sp, LINKAGE_SIZE 124 /* Save REG_ARGS which we will need after we call the subroutine. */ 125 SW TMP2, REG_ARGS 126 } 127 { 128 /* Set up linkage info to hold incoming stack pointer. */ 129 SW TMP, r52 130 } 131 { 132 /* Skip stack args memcpy if we don't have any stack args (common). */ 133 blezt STACK_ARG_BYTES, .Ldone_stack_args_memcpy 134 } 135 136.Lmemcpy_stack_args: 137 { 138 /* Load incoming argument from stack_args. */ 139 LW TMP, INCOMING_STACK_ARGS 140 addi INCOMING_STACK_ARGS, INCOMING_STACK_ARGS, REG_SIZE 141 } 142 { 143 /* Store stack argument into outgoing stack argument area. */ 144 SW OUTGOING_STACK_ARGS, TMP 145 addi OUTGOING_STACK_ARGS, OUTGOING_STACK_ARGS, REG_SIZE 146 addi STACK_ARG_BYTES, STACK_ARG_BYTES, -REG_SIZE 147 } 148 { 149 BGZT STACK_ARG_BYTES, .Lmemcpy_stack_args 150 } 151.Ldone_stack_args_memcpy: 152 153 { 154 /* Copy aside ORIG_FNADDR so we can overwrite its register. */ 155 move FNADDR, ORIG_FNADDR 156 /* Prepare to load argument registers. */ 157 addi REG_ADDR_PTR, r0, REG_SIZE 158 /* Load outgoing r0. */ 159 LW r0, r0 160 } 161 162 /* Load up argument registers from the REG_ARGS array. */ 163#define LOAD_REG(REG, PTR) \ 164 { \ 165 LW REG, PTR ; \ 166 addi PTR, PTR, REG_SIZE \ 167 } 168 169 LOAD_REG(r1, REG_ADDR_PTR) 170 LOAD_REG(r2, REG_ADDR_PTR) 171 LOAD_REG(r3, REG_ADDR_PTR) 172 LOAD_REG(r4, REG_ADDR_PTR) 173 LOAD_REG(r5, REG_ADDR_PTR) 174 LOAD_REG(r6, REG_ADDR_PTR) 175 LOAD_REG(r7, REG_ADDR_PTR) 176 LOAD_REG(r8, REG_ADDR_PTR) 177 LOAD_REG(r9, REG_ADDR_PTR) 178 179 { 180 /* Call the subroutine. */ 181 jalr FNADDR 182 } 183 184 { 185 /* Restore original lr. */ 186 LW lr, r52 187 /* Prepare to recover ARGS, which we spilled earlier. */ 188 addi TMP, r52, -(2 * REG_SIZE) 189 } 190 { 191 /* Restore ARGS, so we can fill it in with the return regs r0-r9. */ 192 LW RETURN_REG_ADDR, TMP 193 /* Prepare to restore original r52. */ 194 addi TMP, r52, -REG_SIZE 195 } 196 197 { 198 /* Pop stack frame. */ 199 move sp, r52 200 /* Restore original r52. */ 201 LW r52, TMP 202 } 203 204#define STORE_REG(REG, PTR) \ 205 { \ 206 SW PTR, REG ; \ 207 addi PTR, PTR, REG_SIZE \ 208 } 209 210 /* Return all register values by reference. */ 211 STORE_REG(r0, RETURN_REG_ADDR) 212 STORE_REG(r1, RETURN_REG_ADDR) 213 STORE_REG(r2, RETURN_REG_ADDR) 214 STORE_REG(r3, RETURN_REG_ADDR) 215 STORE_REG(r4, RETURN_REG_ADDR) 216 STORE_REG(r5, RETURN_REG_ADDR) 217 STORE_REG(r6, RETURN_REG_ADDR) 218 STORE_REG(r7, RETURN_REG_ADDR) 219 STORE_REG(r8, RETURN_REG_ADDR) 220 STORE_REG(r9, RETURN_REG_ADDR) 221 222 { 223 jrp lr 224 } 225 226 .cfi_endproc 227 .size ffi_call_tile, .-ffi_call_tile 228 229/* ffi_closure_tile(...) 230 231 On entry, lr points to the closure plus 8 bytes, and r10 232 contains the actual return address. 233 234 This function simply dumps all register parameters into a stack array 235 and passes the closure, the registers array, and the stack arguments 236 to C code that does all of the actual closure processing. */ 237 238 .section .text.ffi_closure_tile, "ax", @progbits 239 .align 8 240 .globl ffi_closure_tile 241 FFI_HIDDEN(ffi_closure_tile) 242 243 .cfi_startproc 244/* Room to spill all NUM_ARG_REGS incoming registers, plus frame linkage. */ 245#define CLOSURE_FRAME_SIZE (((NUM_ARG_REGS * REG_SIZE * 2 + LINKAGE_SIZE) + 7) & -8) 246ffi_closure_tile: 247 { 248#ifdef __tilegx__ 249 st sp, lr 250 .cfi_offset lr, 0 251#else 252 /* Save return address (in r10 due to closure stub wrapper). */ 253 SW sp, r10 254 .cfi_return_column r10 255 .cfi_offset r10, 0 256#endif 257 /* Compute address for stack frame linkage. */ 258 addli r10, sp, -(CLOSURE_FRAME_SIZE - REG_SIZE) 259 } 260 { 261 /* Save incoming stack pointer in linkage area. */ 262 SW r10, sp 263 .cfi_offset sp, -(CLOSURE_FRAME_SIZE - REG_SIZE) 264 /* Push a new stack frame. */ 265 addli sp, sp, -CLOSURE_FRAME_SIZE 266 .cfi_adjust_cfa_offset CLOSURE_FRAME_SIZE 267 } 268 269 { 270 /* Create pointer to where to start spilling registers. */ 271 addi r10, sp, LINKAGE_SIZE 272 } 273 274 /* Spill all the incoming registers. */ 275 STORE_REG(r0, r10) 276 STORE_REG(r1, r10) 277 STORE_REG(r2, r10) 278 STORE_REG(r3, r10) 279 STORE_REG(r4, r10) 280 STORE_REG(r5, r10) 281 STORE_REG(r6, r10) 282 STORE_REG(r7, r10) 283 STORE_REG(r8, r10) 284 { 285 /* Save r9. */ 286 SW r10, r9 287#ifdef __tilegx__ 288 /* Pointer to closure is passed in r11. */ 289 move r0, r11 290#else 291 /* Compute pointer to the closure object. Because the closure 292 starts with a "jal ffi_closure_tile", we can just take the 293 value of lr (a phony return address pointing into the closure) 294 and subtract 8. */ 295 addi r0, lr, -8 296#endif 297 /* Compute a pointer to the register arguments we just spilled. */ 298 addi r1, sp, LINKAGE_SIZE 299 } 300 { 301 /* Compute a pointer to the extra stack arguments (if any). */ 302 addli r2, sp, CLOSURE_FRAME_SIZE + LINKAGE_SIZE 303 /* Call C code to deal with all of the grotty details. */ 304 jal ffi_closure_tile_inner 305 } 306 { 307 addli r10, sp, CLOSURE_FRAME_SIZE 308 } 309 { 310 /* Restore the return address. */ 311 LW lr, r10 312 /* Compute pointer to registers array. */ 313 addli r10, sp, LINKAGE_SIZE + (NUM_ARG_REGS * REG_SIZE) 314 } 315 /* Return all the register values, which C code may have set. */ 316 LOAD_REG(r0, r10) 317 LOAD_REG(r1, r10) 318 LOAD_REG(r2, r10) 319 LOAD_REG(r3, r10) 320 LOAD_REG(r4, r10) 321 LOAD_REG(r5, r10) 322 LOAD_REG(r6, r10) 323 LOAD_REG(r7, r10) 324 LOAD_REG(r8, r10) 325 LOAD_REG(r9, r10) 326 { 327 /* Pop the frame. */ 328 addli sp, sp, CLOSURE_FRAME_SIZE 329 jrp lr 330 } 331 332 .cfi_endproc 333 .size ffi_closure_tile, . - ffi_closure_tile 334 335 336/* What follows are code template instructions that get copied to the 337 closure trampoline by ffi_prep_closure_loc. The zeroed operands 338 get replaced by their proper values at runtime. */ 339 340 .section .text.ffi_template_tramp_tile, "ax", @progbits 341 .align 8 342 .globl ffi_template_tramp_tile 343 FFI_HIDDEN(ffi_template_tramp_tile) 344ffi_template_tramp_tile: 345#ifdef __tilegx__ 346 { 347 moveli r11, 0 /* backpatched to address of containing closure. */ 348 moveli r10, 0 /* backpatched to ffi_closure_tile. */ 349 } 350 /* Note: the following bundle gets generated multiple times 351 depending on the pointer value (esp. useful for -m32 mode). */ 352 { shl16insli r11, r11, 0 ; shl16insli r10, r10, 0 } 353 { info 2+8 /* for backtracer: -> pc in lr, frame size 0 */ ; jr r10 } 354#else 355 /* 'jal .' yields a PC-relative offset of zero so we can OR in the 356 right offset at runtime. */ 357 { move r10, lr ; jal . /* ffi_closure_tile */ } 358#endif 359 360 .size ffi_template_tramp_tile, . - ffi_template_tramp_tile 361