1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16/* 17 * JNI method invocation. This is used to call a C/C++ JNI method. The 18 * argument list has to be pushed onto the native stack according to 19 * local calling conventions. 20 * 21 * This version supports the "new" ARM EABI. 22 */ 23 24#include <machine/cpu-features.h> 25 26#ifdef __ARM_EABI__ 27 28#ifdef EXTENDED_EABI_DEBUG 29# define DBG 30#else 31# define DBG @ 32#endif 33 34 35/* 36Function prototype: 37 38void dvmPlatformInvoke(void* pEnv, ClassObject* clazz, int argInfo, int argc, 39 const u4* argv, const char* signature, void* func, JValue* pReturn) 40 41The method we are calling has the form: 42 43 return_type func(JNIEnv* pEnv, ClassObject* clazz, ...) 44 -or- 45 return_type func(JNIEnv* pEnv, Object* this, ...) 46 47We receive a collection of 32-bit values which correspond to arguments from 48the interpreter (e.g. float occupies one, double occupies two). It's up to 49us to convert these into local calling conventions. 50*/ 51 52/* 53ARM EABI notes: 54 55r0-r3 hold first 4 args to a method 56r9 is given special treatment in some situations, but not for us 57r10 (sl) seems to be generally available 58r11 (fp) is used by gcc (unless -fomit-frame-pointer is set) 59r12 (ip) is scratch -- not preserved across method calls 60r13 (sp) should be managed carefully in case a signal arrives 61r14 (lr) must be preserved 62r15 (pc) can be tinkered with directly 63 64r0 holds returns of <= 4 bytes 65r0-r1 hold returns of 8 bytes, low word in r0 66 67Callee must save/restore r4+ (except r12) if it modifies them. 68 69Stack is "full descending". Only the arguments that don't fit in the first 4 70registers are placed on the stack. "sp" points at the first stacked argument 71(i.e. the 5th arg). 72 73VFP: single-precision results in s0, double-precision results in d0. 74 75In the EABI, "sp" must be 64-bit aligned on entry to a function, and any 7664-bit quantities (long long, double) must be 64-bit aligned. This means 77we have to scan the method signature, identify arguments that must be 78padded, and fix them up appropriately. 79*/ 80 81 .text 82 .align 2 83 .global dvmPlatformInvoke 84 .type dvmPlatformInvoke, %function 85 86/* 87 * On entry: 88 * r0 JNIEnv (can be left alone) 89 * r1 clazz (NULL for virtual method calls, non-NULL for static) 90 * r2 arg info 91 * r3 argc (number of 32-bit values in argv) 92 * [sp] argv 93 * [sp,#4] short signature 94 * [sp,#8] func 95 * [sp,#12] pReturn 96 * 97 * For a virtual method call, the "this" reference is in argv[0]. 98 * 99 * argInfo (32-bit int) layout: 100 * SRRRLLLL FFFFFFFF FFFFFFFF FFFFFFFF 101 * 102 * S - if set, do things the hard way (scan the signature) 103 * R - return type enumeration, really only important for hardware FP 104 * L - number of double-words of storage required on stack (0-30 words) 105 * F - pad flag -- if set, write a pad word to the stack 106 * 107 * With this arrangement we can efficiently push up to 24 words of arguments 108 * onto the stack. Anything requiring more than that -- which should happen 109 * rarely to never -- can do the slow signature scan. 110 * 111 * (We could pack the Fs more efficiently -- we know we never push two pads 112 * in a row, and the first word can never be a pad -- but there's really 113 * no need for it.) 114 * 115 * TODO: could reduce register-saving overhead for "fast" case, since we 116 * don't use a couple of registers. Another thought is to rearrange the 117 * arguments such that r0/r1 get passed in on the stack, allowing us to 118 * use r0/r1 freely here and then load them with a single ldm. Might be 119 * faster than saving/restoring other registers so that we can leave r0/r1 120 * undisturbed. 121 * 122 * NOTE: if the called function has more than 4 words of arguments, gdb 123 * will not be able to unwind the stack past this method. The only way 124 * around this is to convince gdb to respect an explicit frame pointer. 125 */ 126dvmPlatformInvoke: 127 .fnstart 128 @ Save regs. Same style as gcc with "-fomit-frame-pointer" -- we don't 129 @ disturb "fp" in case somebody else wants it. Copy "sp" to r4 and use 130 @ that to access local vars. 131 @ 132 @ On entry to a function, "sp" must be 64-bit aligned. This means 133 @ we have to adjust sp manually if we push an odd number of regs here 134 @ (both here and when exiting). Easier to just push an even number 135 @ of registers. 136 mov ip, sp @ ip<- original stack pointer 137 .save {r4, r5, r6, r7, r8, r9, ip, lr} 138 stmfd sp!, {r4, r5, r6, r7, r8, r9, ip, lr} 139 140 mov r4, ip @ r4<- original stack pointer 141 142 @ Ensure 64-bit alignment. EABI guarantees sp is aligned on entry, make 143 @ sure we're aligned properly now. 144DBG tst sp, #4 @ 64-bit aligned? 145DBG bne dvmAbort 146 147 cmp r1, #0 @ Is this a static method? 148 ldr r9, [r4] @ r9<- argv 149 150 @ Not static: set r1 to *argv++ ("this"), and set argc--. 151 @ 152 @ Note the "this" pointer is not included in the method signature. 153 ldreq r1, [r9], #4 154 subeq r3, r3, #1 155 156 @ Do we have arg padding flags in "argInfo"? (just need to check hi bit) 157 teqs r2, #0 158 bmi .Lno_arg_info 159 160 /* 161 * "Fast" path. 162 * 163 * Make room on the stack for the arguments and copy them over, 164 * inserting pad words when appropriate. 165 * 166 * Currently: 167 * r0 don't touch 168 * r1 don't touch 169 * r2 arg info 170 * r3 argc 171 * r4 original stack pointer 172 * r5-r8 (available) 173 * r9 argv 174 */ 175.Lhave_arg_info: 176 @ Expand the stack by the specified amount. We want to extract the 177 @ count of double-words from r2, multiply it by 8, and subtract that 178 @ from the stack pointer. 179 and ip, r2, #0x0f000000 @ ip<- double-words required 180 mov r5, r2, lsr #28 @ r5<- return type 181 sub sp, sp, ip, lsr #21 @ shift right 24, then left 3 182 mov r8, sp @ r8<- sp (arg copy dest) 183 184 @ Stick argv in r7 and advance it past the argv values that will be 185 @ held in r2-r3. It's possible r3 will hold a pad, so check the 186 @ bit in r2. We do this by ignoring the first bit (which would 187 @ indicate a pad in r2) and shifting the second into the carry flag. 188 @ If the carry is set, r3 will hold a pad, so we adjust argv less. 189 @ 190 @ (This is harmless if argc==0) 191 mov r7, r9 192 movs r2, r2, lsr #2 193 addcc r7, r7, #8 @ skip past 2 words, for r2 and r3 194 subcc r3, r3, #2 195 addcs r7, r7, #4 @ skip past 1 word, for r2 196 subcs r3, r3, #1 197 198.Lfast_copy_loop: 199 @ if (--argc < 0) goto invoke 200 subs r3, r3, #1 201 bmi .Lcopy_done @ NOTE: expects original argv in r9 202 203.Lfast_copy_loop2: 204 @ Get pad flag into carry bit. If it's set, we don't pull a value 205 @ out of argv. 206 movs r2, r2, lsr #1 207 208 ldrcc ip, [r7], #4 @ ip = *r7++ (pull from argv) 209 strcc ip, [r8], #4 @ *r8++ = ip (write to stack) 210 bcc .Lfast_copy_loop 211 212DBG movcs ip, #-3 @ DEBUG DEBUG - make pad word obvious 213DBG strcs ip, [r8] @ DEBUG DEBUG 214 add r8, r8, #4 @ if pad, just advance ip without store 215 b .Lfast_copy_loop2 @ don't adjust argc after writing pad 216 217 218 219.Lcopy_done: 220 /* 221 * Currently: 222 * r0-r3 args (JNIEnv*, thisOrClass, arg0, arg1) 223 * r4 original saved sp 224 * r5 return type (enum DalvikJniReturnType) 225 * r9 original argv 226 * 227 * The stack copy is complete. Grab the first two words off of argv 228 * and tuck them into r2/r3. If the first arg is 32-bit and the second 229 * arg is 64-bit, then r3 "holds" a pad word and the load is unnecessary 230 * but harmless. 231 * 232 * If there are 0 or 1 arg words in argv, we will be loading uninitialized 233 * data into the registers, but since nothing tries to use it it's also 234 * harmless (assuming argv[0] and argv[1] point to valid memory, which 235 * is a reasonable assumption for Dalvik's interpreted stacks). 236 * 237 */ 238 ldmia r9, {r2-r3} @ r2/r3<- argv[0]/argv[1] 239 240 @ call the method 241 ldr ip, [r4, #8] @ func 242 blx ip 243 244 @ We're back, result is in r0 or (for long/double) r0-r1. 245 @ 246 @ In theory, we need to use the "return type" arg to figure out what 247 @ we have and how to return it. However, unless we have an FPU, 248 @ all we need to do is copy r0-r1 into the JValue union. 249 @ 250 @ Thought: could redefine DalvikJniReturnType such that single-word 251 @ and double-word values occupy different ranges; simple comparison 252 @ allows us to choose between str and stm. Probably not worthwhile. 253 @ 254 cmp r5, #0 @ DALVIK_JNI_RETURN_VOID? 255 ldrne ip, [r4, #12] @ pReturn 256 stmneia ip, {r0-r1} @ pReturn->j <- r0/r1 257 258 @ Restore the registers we saved and return (restores lr into pc, and 259 @ the initial stack pointer into sp). 260#ifdef __ARM_HAVE_PC_INTERWORK 261 ldmdb r4, {r4, r5, r6, r7, r8, r9, sp, pc} 262#else 263 ldmdb r4, {r4, r5, r6, r7, r8, r9, sp, lr} 264 bx lr 265#endif 266 .fnend 267 268 269 270 /* 271 * "Slow" path. 272 * Walk through the argument list, counting up the number of 32-bit words 273 * required to contain it. Then walk through it a second time, copying 274 * values out to the stack. (We could pre-compute the size to save 275 * ourselves a trip, but we'd have to store that somewhere -- this is 276 * sufficiently unlikely that it's not worthwhile.) 277 * 278 * Try not to make any assumptions about the number of args -- I think 279 * the class file format allows up to 64K words (need to verify that). 280 * 281 * Currently: 282 * r0 don't touch 283 * r1 don't touch 284 * r2 (available) 285 * r3 argc 286 * r4 original stack pointer 287 * r5-r8 (available) 288 * r9 argv 289 */ 290.Lno_arg_info: 291 mov r5, r2, lsr #28 @ r5<- return type 292 ldr r6, [r4, #4] @ r6<- short signature 293 mov r2, #0 @ r2<- word count, init to zero 294 295.Lcount_loop: 296 ldrb ip, [r6], #1 @ ip<- *signature++ 297 cmp ip, #0 @ end? 298 beq .Lcount_done @ all done, bail 299 add r2, r2, #1 @ count++ 300 cmp ip, #'D' @ look for 'D' or 'J', which are 64-bit 301 cmpne ip, #'J' 302 bne .Lcount_loop 303 304 @ 64-bit value, insert padding if we're not aligned 305 tst r2, #1 @ odd after initial incr? 306 addne r2, #1 @ no, add 1 more to cover 64 bits 307 addeq r2, #2 @ yes, treat prev as pad, incr 2 now 308 b .Lcount_loop 309.Lcount_done: 310 311 @ We have the padded-out word count in r2. We subtract 2 from it 312 @ because we don't push the first two arg words on the stack (they're 313 @ destined for r2/r3). Pushing them on and popping them off would be 314 @ simpler but slower. 315 subs r2, r2, #2 @ subtract 2 (for contents of r2/r3) 316 movmis r2, #0 @ if negative, peg at zero, set Z-flag 317 beq .Lcopy_done @ zero args, skip stack copy 318 319DBG tst sp, #7 @ DEBUG - make sure sp is aligned now 320DBG bne dvmAbort @ DEBUG 321 322 @ Set up to copy from r7 to r8. We copy from the second arg to the 323 @ last arg, which means reading and writing to ascending addresses. 324 sub sp, sp, r2, asl #2 @ sp<- sp - r2*4 325 bic sp, #4 @ subtract another 4 ifn 326 mov r7, r9 @ r7<- argv 327 mov r8, sp @ r8<- sp 328 329 @ We need to copy words from [r7] to [r8]. We walk forward through 330 @ the signature again, "copying" pad words when appropriate, storing 331 @ upward into the stack. 332 ldr r6, [r4, #4] @ r6<- signature 333 add r7, r7, #8 @ r7<- r7+8 (assume argv 0/1 in r2/r3) 334 335 @ Eat first arg or two, for the stuff that goes into r2/r3. 336 ldrb ip, [r6], #1 @ ip<- *signature++ 337 cmp ip, #'D' 338 cmpne ip, #'J' 339 beq .Lstack_copy_loop @ 64-bit arg fills r2+r3 340 341 @ First arg was 32-bit, check the next 342 ldrb ip, [r6], #1 @ ip<- *signature++ 343 cmp r6, #'D' 344 cmpne r6, #'J' 345 subeq r7, #4 @ r7<- r7-4 (take it back - pad word) 346 beq .Lstack_copy_loop2 @ start with char we already have 347 348 @ Two 32-bit args, fall through and start with next arg 349 350.Lstack_copy_loop: 351 ldrb ip, [r6], #1 @ ip<- *signature++ 352.Lstack_copy_loop2: 353 cmp ip, #0 @ end of shorty? 354 beq .Lcopy_done @ yes 355 356 cmp ip, #'D' 357 cmpne ip, #'J' 358 beq .Lcopy64 359 360 @ Copy a 32-bit value. [r8] is initially at the end of the stack. We 361 @ use "full descending" stacks, so we store into [r8] and incr as we 362 @ move toward the end of the arg list. 363.Lcopy32: 364 ldr ip, [r7], #4 365 str ip, [r8], #4 366 b .Lstack_copy_loop 367 368.Lcopy64: 369 @ Copy a 64-bit value. If necessary, leave a hole in the stack to 370 @ ensure alignment. We know the [r8] output area is 64-bit aligned, 371 @ so we can just mask the address. 372 add r8, r8, #7 @ r8<- (r8+7) & ~7 373 ldr ip, [r7], #4 374 bic r8, r8, #7 375 ldr r2, [r7], #4 376 str ip, [r8], #4 377 str r2, [r8], #4 378 b .Lstack_copy_loop 379 380 381 382#if 0 383 384/* 385 * Spit out a "we were here", preserving all registers. (The attempt 386 * to save ip won't work, but we need to save an even number of 387 * registers for EABI 64-bit stack alignment.) 388 */ 389 .macro SQUEAK num 390common_squeak\num: 391 stmfd sp!, {r0, r1, r2, r3, ip, lr} 392 ldr r0, strSqueak 393 mov r1, #\num 394 bl printf 395#ifdef __ARM_HAVE_PC_INTERWORK 396 ldmfd sp!, {r0, r1, r2, r3, ip, pc} 397#else 398 ldmfd sp!, {r0, r1, r2, r3, ip, lr} 399 bx lr 400#endif 401 .endm 402 403 SQUEAK 0 404 SQUEAK 1 405 SQUEAK 2 406 SQUEAK 3 407 SQUEAK 4 408 SQUEAK 5 409 410strSqueak: 411 .word .LstrSqueak 412.LstrSqueak: 413 .asciz "<%d>" 414 415 .align 2 416 417#endif 418 419#endif /*__ARM_EABI__*/ 420