1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "asm_support_x86_64.S" 18#include "interpreter/cfi_asm_support.h" 19 20#include "arch/quick_alloc_entrypoints.S" 21 22MACRO0(ASSERT_USE_READ_BARRIER) 23#if !defined(USE_READ_BARRIER) 24 int3 25 int3 26#endif 27END_MACRO 28 29MACRO0(SETUP_FP_CALLEE_SAVE_FRAME) 30 // Create space for ART FP callee-saved registers 31 subq MACRO_LITERAL(4 * 8), %rsp 32 CFI_ADJUST_CFA_OFFSET(4 * 8) 33 movq %xmm12, 0(%rsp) 34 movq %xmm13, 8(%rsp) 35 movq %xmm14, 16(%rsp) 36 movq %xmm15, 24(%rsp) 37END_MACRO 38 39MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME) 40 // Restore ART FP callee-saved registers 41 movq 0(%rsp), %xmm12 42 movq 8(%rsp), %xmm13 43 movq 16(%rsp), %xmm14 44 movq 24(%rsp), %xmm15 45 addq MACRO_LITERAL(4 * 8), %rsp 46 CFI_ADJUST_CFA_OFFSET(- 4 * 8) 47END_MACRO 48 49// For x86, the CFA is esp+4, the address above the pushed return address on the stack. 50 51 /* 52 * Macro that sets up the callee save frame to conform with 53 * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves) 54 */ 55MACRO0(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME) 56#if defined(__APPLE__) 57 int3 58 int3 59#else 60 // R10 := Runtime::Current() 61 movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 62 movq (%r10), %r10 63 // Save callee save registers to agree with core spills bitmap. 64 PUSH r15 // Callee save. 65 PUSH r14 // Callee save. 66 PUSH r13 // Callee save. 67 PUSH r12 // Callee save. 68 PUSH rbp // Callee save. 69 PUSH rbx // Callee save. 70 // Create space for FPR args, plus space for ArtMethod*. 71 subq MACRO_LITERAL(4 * 8 + 8), %rsp 72 CFI_ADJUST_CFA_OFFSET(4 * 8 + 8) 73 // Save FPRs. 74 movq %xmm12, 8(%rsp) 75 movq %xmm13, 16(%rsp) 76 movq %xmm14, 24(%rsp) 77 movq %xmm15, 32(%rsp) 78 // R10 := ArtMethod* for save all callee save frame method. 79 movq RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(%r10), %r10 80 // Store ArtMethod* to bottom of stack. 81 movq %r10, 0(%rsp) 82 // Store rsp as the top quick frame. 83 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 84 85 // Ugly compile-time check, but we only have the preprocessor. 86 // Last +8: implicit return address pushed on stack when caller made call. 87#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 6 * 8 + 4 * 8 + 8 + 8) 88#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86_64) size not as expected." 89#endif 90#endif // __APPLE__ 91END_MACRO 92 93 /* 94 * Macro that sets up the callee save frame to conform with 95 * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly) 96 */ 97MACRO0(SETUP_SAVE_REFS_ONLY_FRAME) 98#if defined(__APPLE__) 99 int3 100 int3 101#else 102 // R10 := Runtime::Current() 103 movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 104 movq (%r10), %r10 105 // Save callee and GPR args, mixed together to agree with core spills bitmap. 106 PUSH r15 // Callee save. 107 PUSH r14 // Callee save. 108 PUSH r13 // Callee save. 109 PUSH r12 // Callee save. 110 PUSH rbp // Callee save. 111 PUSH rbx // Callee save. 112 // Create space for FPR args, plus space for ArtMethod*. 113 subq LITERAL(8 + 4 * 8), %rsp 114 CFI_ADJUST_CFA_OFFSET(8 + 4 * 8) 115 // Save FPRs. 116 movq %xmm12, 8(%rsp) 117 movq %xmm13, 16(%rsp) 118 movq %xmm14, 24(%rsp) 119 movq %xmm15, 32(%rsp) 120 // R10 := ArtMethod* for refs only callee save frame method. 121 movq RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(%r10), %r10 122 // Store ArtMethod* to bottom of stack. 123 movq %r10, 0(%rsp) 124 // Store rsp as the stop quick frame. 125 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 126 127 // Ugly compile-time check, but we only have the preprocessor. 128 // Last +8: implicit return address pushed on stack when caller made call. 129#if (FRAME_SIZE_SAVE_REFS_ONLY != 6 * 8 + 4 * 8 + 8 + 8) 130#error "FRAME_SIZE_SAVE_REFS_ONLY(X86_64) size not as expected." 131#endif 132#endif // __APPLE__ 133END_MACRO 134 135MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME) 136 movq 8(%rsp), %xmm12 137 movq 16(%rsp), %xmm13 138 movq 24(%rsp), %xmm14 139 movq 32(%rsp), %xmm15 140 addq LITERAL(8 + 4*8), %rsp 141 CFI_ADJUST_CFA_OFFSET(-8 - 4*8) 142 // TODO: optimize by not restoring callee-saves restored by the ABI 143 POP rbx 144 POP rbp 145 POP r12 146 POP r13 147 POP r14 148 POP r15 149END_MACRO 150 151 /* 152 * Macro that sets up the callee save frame to conform with 153 * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs) 154 */ 155MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME) 156#if defined(__APPLE__) 157 int3 158 int3 159#else 160 // R10 := Runtime::Current() 161 movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 162 movq (%r10), %r10 163 // Save callee and GPR args, mixed together to agree with core spills bitmap. 164 PUSH r15 // Callee save. 165 PUSH r14 // Callee save. 166 PUSH r13 // Callee save. 167 PUSH r12 // Callee save. 168 PUSH r9 // Quick arg 5. 169 PUSH r8 // Quick arg 4. 170 PUSH rsi // Quick arg 1. 171 PUSH rbp // Callee save. 172 PUSH rbx // Callee save. 173 PUSH rdx // Quick arg 2. 174 PUSH rcx // Quick arg 3. 175 // Create space for FPR args and create 2 slots for ArtMethod*. 176 subq MACRO_LITERAL(16 + 12 * 8), %rsp 177 CFI_ADJUST_CFA_OFFSET(16 + 12 * 8) 178 // R10 := ArtMethod* for ref and args callee save frame method. 179 movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10 180 // Save FPRs. 181 movq %xmm0, 16(%rsp) 182 movq %xmm1, 24(%rsp) 183 movq %xmm2, 32(%rsp) 184 movq %xmm3, 40(%rsp) 185 movq %xmm4, 48(%rsp) 186 movq %xmm5, 56(%rsp) 187 movq %xmm6, 64(%rsp) 188 movq %xmm7, 72(%rsp) 189 movq %xmm12, 80(%rsp) 190 movq %xmm13, 88(%rsp) 191 movq %xmm14, 96(%rsp) 192 movq %xmm15, 104(%rsp) 193 // Store ArtMethod* to bottom of stack. 194 movq %r10, 0(%rsp) 195 // Store rsp as the top quick frame. 196 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 197 198 // Ugly compile-time check, but we only have the preprocessor. 199 // Last +8: implicit return address pushed on stack when caller made call. 200#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8) 201#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected." 202#endif 203#endif // __APPLE__ 204END_MACRO 205 206MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI) 207 // Save callee and GPR args, mixed together to agree with core spills bitmap. 208 PUSH r15 // Callee save. 209 PUSH r14 // Callee save. 210 PUSH r13 // Callee save. 211 PUSH r12 // Callee save. 212 PUSH r9 // Quick arg 5. 213 PUSH r8 // Quick arg 4. 214 PUSH rsi // Quick arg 1. 215 PUSH rbp // Callee save. 216 PUSH rbx // Callee save. 217 PUSH rdx // Quick arg 2. 218 PUSH rcx // Quick arg 3. 219 // Create space for FPR args and create 2 slots for ArtMethod*. 220 subq LITERAL(80 + 4 * 8), %rsp 221 CFI_ADJUST_CFA_OFFSET(80 + 4 * 8) 222 // Save FPRs. 223 movq %xmm0, 16(%rsp) 224 movq %xmm1, 24(%rsp) 225 movq %xmm2, 32(%rsp) 226 movq %xmm3, 40(%rsp) 227 movq %xmm4, 48(%rsp) 228 movq %xmm5, 56(%rsp) 229 movq %xmm6, 64(%rsp) 230 movq %xmm7, 72(%rsp) 231 movq %xmm12, 80(%rsp) 232 movq %xmm13, 88(%rsp) 233 movq %xmm14, 96(%rsp) 234 movq %xmm15, 104(%rsp) 235 // Store ArtMethod to bottom of stack. 236 movq %rdi, 0(%rsp) 237 // Store rsp as the stop quick frame. 238 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 239END_MACRO 240 241MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME) 242 // Restore FPRs. 243 movq 16(%rsp), %xmm0 244 movq 24(%rsp), %xmm1 245 movq 32(%rsp), %xmm2 246 movq 40(%rsp), %xmm3 247 movq 48(%rsp), %xmm4 248 movq 56(%rsp), %xmm5 249 movq 64(%rsp), %xmm6 250 movq 72(%rsp), %xmm7 251 movq 80(%rsp), %xmm12 252 movq 88(%rsp), %xmm13 253 movq 96(%rsp), %xmm14 254 movq 104(%rsp), %xmm15 255 addq MACRO_LITERAL(80 + 4 * 8), %rsp 256 CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8)) 257 // Restore callee and GPR args, mixed together to agree with core spills bitmap. 258 POP rcx 259 POP rdx 260 POP rbx 261 POP rbp 262 POP rsi 263 POP r8 264 POP r9 265 POP r12 266 POP r13 267 POP r14 268 POP r15 269END_MACRO 270 271 /* 272 * Macro that sets up the callee save frame to conform with 273 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 274 * when R14 and R15 are already saved. 275 */ 276MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET) 277#if defined(__APPLE__) 278 int3 279 int3 280#else 281 // Save core registers from highest to lowest to agree with core spills bitmap. 282 // R14 and R15, or at least placeholders for them, are already on the stack. 283 PUSH r13 284 PUSH r12 285 PUSH r11 286 PUSH r10 287 PUSH r9 288 PUSH r8 289 PUSH rdi 290 PUSH rsi 291 PUSH rbp 292 PUSH rbx 293 PUSH rdx 294 PUSH rcx 295 PUSH rax 296 // Create space for FPRs and stack alignment padding. 297 subq MACRO_LITERAL(8 + 16 * 8), %rsp 298 CFI_ADJUST_CFA_OFFSET(8 + 16 * 8) 299 // R10 := Runtime::Current() 300 movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 301 movq (%r10), %r10 302 // Save FPRs. 303 movq %xmm0, 8(%rsp) 304 movq %xmm1, 16(%rsp) 305 movq %xmm2, 24(%rsp) 306 movq %xmm3, 32(%rsp) 307 movq %xmm4, 40(%rsp) 308 movq %xmm5, 48(%rsp) 309 movq %xmm6, 56(%rsp) 310 movq %xmm7, 64(%rsp) 311 movq %xmm8, 72(%rsp) 312 movq %xmm9, 80(%rsp) 313 movq %xmm10, 88(%rsp) 314 movq %xmm11, 96(%rsp) 315 movq %xmm12, 104(%rsp) 316 movq %xmm13, 112(%rsp) 317 movq %xmm14, 120(%rsp) 318 movq %xmm15, 128(%rsp) 319 // Push ArtMethod* for save everything frame method. 320 pushq \runtime_method_offset(%r10) 321 CFI_ADJUST_CFA_OFFSET(8) 322 // Store rsp as the top quick frame. 323 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 324 325 // Ugly compile-time check, but we only have the preprocessor. 326 // Last +8: implicit return address pushed on stack when caller made call. 327#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8) 328#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected." 329#endif 330#endif // __APPLE__ 331END_MACRO 332 333 /* 334 * Macro that sets up the callee save frame to conform with 335 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 336 * when R15 is already saved. 337 */ 338MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET) 339 PUSH r14 340 SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset 341END_MACRO 342 343 /* 344 * Macro that sets up the callee save frame to conform with 345 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 346 */ 347MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET) 348 PUSH r15 349 SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset 350END_MACRO 351 352MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS) 353 // Restore FPRs. Method and padding is still on the stack. 354 movq 16(%rsp), %xmm0 355 movq 24(%rsp), %xmm1 356 movq 32(%rsp), %xmm2 357 movq 40(%rsp), %xmm3 358 movq 48(%rsp), %xmm4 359 movq 56(%rsp), %xmm5 360 movq 64(%rsp), %xmm6 361 movq 72(%rsp), %xmm7 362 movq 80(%rsp), %xmm8 363 movq 88(%rsp), %xmm9 364 movq 96(%rsp), %xmm10 365 movq 104(%rsp), %xmm11 366 movq 112(%rsp), %xmm12 367 movq 120(%rsp), %xmm13 368 movq 128(%rsp), %xmm14 369 movq 136(%rsp), %xmm15 370END_MACRO 371 372MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX) 373 // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap. 374 POP rcx 375 POP rdx 376 POP rbx 377 POP rbp 378 POP rsi 379 POP rdi 380 POP r8 381 POP r9 382 POP r10 383 POP r11 384 POP r12 385 POP r13 386 POP r14 387 POP r15 388END_MACRO 389 390MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) 391 RESTORE_SAVE_EVERYTHING_FRAME_FRPS 392 393 // Remove save everything callee save method, stack alignment padding and FPRs. 394 addq MACRO_LITERAL(16 + 16 * 8), %rsp 395 CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8)) 396 397 POP rax 398 RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX 399END_MACRO 400 401MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX) 402 RESTORE_SAVE_EVERYTHING_FRAME_FRPS 403 404 // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX. 405 addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp 406 CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8)) 407 408 RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX 409END_MACRO 410 411 /* 412 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 413 * exception is Thread::Current()->exception_ when the runtime method frame is ready. 414 */ 415MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY) 416 // (Thread*) setup 417 movq %gs:THREAD_SELF_OFFSET, %rdi 418 call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*) 419 UNREACHABLE 420END_MACRO 421 422 /* 423 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 424 * exception is Thread::Current()->exception_. 425 */ 426MACRO0(DELIVER_PENDING_EXCEPTION) 427 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save callee saves for throw 428 DELIVER_PENDING_EXCEPTION_FRAME_READY 429END_MACRO 430 431MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) 432 DEFINE_FUNCTION VAR(c_name) 433 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context 434 // Outgoing argument set up 435 movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current() 436 call CALLVAR(cxx_name) // cxx_name(Thread*) 437 UNREACHABLE 438 END_FUNCTION VAR(c_name) 439END_MACRO 440 441MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name) 442 DEFINE_FUNCTION VAR(c_name) 443 SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context 444 // Outgoing argument set up 445 movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current() 446 call CALLVAR(cxx_name) // cxx_name(Thread*) 447 UNREACHABLE 448 END_FUNCTION VAR(c_name) 449END_MACRO 450 451MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) 452 DEFINE_FUNCTION VAR(c_name) 453 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context 454 // Outgoing argument set up 455 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 456 call CALLVAR(cxx_name) // cxx_name(arg1, Thread*) 457 UNREACHABLE 458 END_FUNCTION VAR(c_name) 459END_MACRO 460 461MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name) 462 DEFINE_FUNCTION VAR(c_name) 463 SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context 464 // Outgoing argument set up 465 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 466 call CALLVAR(cxx_name) // cxx_name(Thread*) 467 UNREACHABLE 468 END_FUNCTION VAR(c_name) 469END_MACRO 470 471 /* 472 * Called by managed code to create and deliver a NullPointerException. 473 */ 474NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode 475 476 /* 477 * Call installed by a signal handler to create and deliver a NullPointerException. 478 */ 479DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__ 480 // Fault address and return address were saved by the fault handler. 481 // Save all registers as basis for long jump context; R15 will replace fault address later. 482 SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED 483 // Retrieve fault address and save R15. 484 movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi 485 movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp) 486 CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)) 487 // Outgoing argument set up; RDI already contains the fault address. 488 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 489 call SYMBOL(artThrowNullPointerExceptionFromSignal) // (addr, self) 490 UNREACHABLE 491END_FUNCTION art_quick_throw_null_pointer_exception_from_signal 492 493 /* 494 * Called by managed code to create and deliver an ArithmeticException. 495 */ 496NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode 497 498 /* 499 * Called by managed code to create and deliver a StackOverflowError. 500 */ 501NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode 502 503 /* 504 * Called by managed code, saves callee saves and then calls artThrowException 505 * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception. 506 */ 507ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode 508 509 /* 510 * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds 511 * index, arg2 holds limit. 512 */ 513TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode 514 515 /* 516 * Called by managed code to create and deliver a StringIndexOutOfBoundsException 517 * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit. 518 */ 519TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode 520 521 /* 522 * All generated callsites for interface invokes and invocation slow paths will load arguments 523 * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain 524 * the method_idx. This wrapper will save arg1-arg3, and call the appropriate C helper. 525 * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi. 526 * 527 * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting 528 * of the target Method* in rax and method->code_ in rdx. 529 * 530 * If unsuccessful, the helper will return null/????. There will be a pending exception in the 531 * thread and we branch to another stub to deliver it. 532 * 533 * On success this wrapper will restore arguments and *jump* to the target, leaving the return 534 * location on the stack. 535 * 536 * Adapted from x86 code. 537 */ 538MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name) 539 SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves in case allocation triggers GC 540 // Helper signature is always 541 // (method_idx, *this_object, *caller_method, *self, sp) 542 543 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread 544 movq %rsp, %rcx // pass SP 545 546 call CALLVAR(cxx_name) // cxx_name(arg1, arg2, Thread*, SP) 547 // save the code pointer 548 movq %rax, %rdi 549 movq %rdx, %rax 550 RESTORE_SAVE_REFS_AND_ARGS_FRAME 551 552 testq %rdi, %rdi 553 jz 1f 554 555 // Tail call to intended method. 556 jmp *%rax 5571: 558 DELIVER_PENDING_EXCEPTION 559END_MACRO 560MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name) 561 DEFINE_FUNCTION VAR(c_name) 562 INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name) 563 END_FUNCTION VAR(c_name) 564END_MACRO 565 566INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck 567 568INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck 569INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck 570INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck 571INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck 572 573 574 /* 575 * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty, 576 * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters 577 * the end of the shorty. 578 */ 579MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished) 5801: // LOOP 581 movb (%r10), %al // al := *shorty 582 addq MACRO_LITERAL(1), %r10 // shorty++ 583 cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto xmm_setup_finished 584 je VAR(finished) 585 cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE 586 je 2f 587 cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT 588 je 3f 589 addq MACRO_LITERAL(4), %r11 // arg_array++ 590 // Handle extra space in arg array taken by a long. 591 cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP 592 jne 1b 593 addq MACRO_LITERAL(4), %r11 // arg_array++ 594 jmp 1b // goto LOOP 5952: // FOUND_DOUBLE 596 movsd (%r11), REG_VAR(xmm_reg) 597 addq MACRO_LITERAL(8), %r11 // arg_array+=2 598 jmp 4f 5993: // FOUND_FLOAT 600 movss (%r11), REG_VAR(xmm_reg) 601 addq MACRO_LITERAL(4), %r11 // arg_array++ 6024: 603END_MACRO 604 605 /* 606 * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty, 607 * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters 608 * the end of the shorty. 609 */ 610MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished) 6111: // LOOP 612 movb (%r10), %al // al := *shorty 613 addq MACRO_LITERAL(1), %r10 // shorty++ 614 cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto gpr_setup_finished 615 je VAR(finished) 616 cmpb MACRO_LITERAL(74), %al // if (al == 'J') goto FOUND_LONG 617 je 2f 618 cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT 619 je 3f 620 cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE 621 je 4f 622 movl (%r11), REG_VAR(gpr_reg32) 623 addq MACRO_LITERAL(4), %r11 // arg_array++ 624 jmp 5f 6252: // FOUND_LONG 626 movq (%r11), REG_VAR(gpr_reg64) 627 addq MACRO_LITERAL(8), %r11 // arg_array+=2 628 jmp 5f 6293: // SKIP_FLOAT 630 addq MACRO_LITERAL(4), %r11 // arg_array++ 631 jmp 1b 6324: // SKIP_DOUBLE 633 addq MACRO_LITERAL(8), %r11 // arg_array+=2 634 jmp 1b 6355: 636END_MACRO 637 638 /* 639 * Quick invocation stub. 640 * On entry: 641 * [sp] = return address 642 * rdi = method pointer 643 * rsi = argument array that must at least contain the this pointer. 644 * rdx = size of argument array in bytes 645 * rcx = (managed) thread pointer 646 * r8 = JValue* result 647 * r9 = char* shorty 648 */ 649DEFINE_FUNCTION art_quick_invoke_stub 650#if defined(__APPLE__) 651 int3 652 int3 653#else 654 // Set up argument XMM registers. 655 leaq 1(%r9), %r10 // R10 := shorty + 1 ; ie skip return arg character. 656 leaq 4(%rsi), %r11 // R11 := arg_array + 4 ; ie skip this pointer. 657 LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished 658 LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished 659 LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished 660 LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished 661 LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished 662 LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished 663 LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished 664 LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished 665 .balign 16 666.Lxmm_setup_finished: 667 PUSH rbp // Save rbp. 668 PUSH r8 // Save r8/result*. 669 PUSH r9 // Save r9/shorty*. 670 PUSH rbx // Save native callee save rbx 671 PUSH r12 // Save native callee save r12 672 PUSH r13 // Save native callee save r13 673 PUSH r14 // Save native callee save r14 674 PUSH r15 // Save native callee save r15 675 movq %rsp, %rbp // Copy value of stack pointer into base pointer. 676 CFI_DEF_CFA_REGISTER(rbp) 677 678 movl %edx, %r10d 679 addl LITERAL(100), %edx // Reserve space for return addr, StackReference<method>, rbp, 680 // r8, r9, rbx, r12, r13, r14, and r15 in frame. 681 andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes. 682 subl LITERAL(72), %edx // Remove space for return address, rbp, r8, r9, rbx, r12, 683 // r13, r14, and r15 684 subq %rdx, %rsp // Reserve stack space for argument array. 685 686#if (STACK_REFERENCE_SIZE != 4) 687#error "STACK_REFERENCE_SIZE(X86_64) size not as expected." 688#endif 689 movq LITERAL(0), (%rsp) // Store null for method* 690 691 movl %r10d, %ecx // Place size of args in rcx. 692 movq %rdi, %rax // rax := method to be called 693 movq %rsi, %r11 // r11 := arg_array 694 leaq 8(%rsp), %rdi // rdi is pointing just above the ArtMethod* in the stack 695 // arguments. 696 // Copy arg array into stack. 697 rep movsb // while (rcx--) { *rdi++ = *rsi++ } 698 leaq 1(%r9), %r10 // r10 := shorty + 1 ; ie skip return arg character 699 movq %rax, %rdi // rdi := method to be called 700 movl (%r11), %esi // rsi := this pointer 701 addq LITERAL(4), %r11 // arg_array++ 702 LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished 703 LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished 704 LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished 705 LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished 706.Lgpr_setup_finished: 707 call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method. 708 movq %rbp, %rsp // Restore stack pointer. 709 POP r15 // Pop r15 710 POP r14 // Pop r14 711 POP r13 // Pop r13 712 POP r12 // Pop r12 713 POP rbx // Pop rbx 714 POP r9 // Pop r9 - shorty* 715 POP r8 // Pop r8 - result*. 716 POP rbp // Pop rbp 717 cmpb LITERAL(68), (%r9) // Test if result type char == 'D'. 718 je .Lreturn_double_quick 719 cmpb LITERAL(70), (%r9) // Test if result type char == 'F'. 720 je .Lreturn_float_quick 721 movq %rax, (%r8) // Store the result assuming its a long, int or Object* 722 ret 723.Lreturn_double_quick: 724 movsd %xmm0, (%r8) // Store the double floating point result. 725 ret 726.Lreturn_float_quick: 727 movss %xmm0, (%r8) // Store the floating point result. 728 ret 729#endif // __APPLE__ 730END_FUNCTION art_quick_invoke_stub 731 732 /* 733 * Quick invocation stub. 734 * On entry: 735 * [sp] = return address 736 * rdi = method pointer 737 * rsi = argument array or null if no arguments. 738 * rdx = size of argument array in bytes 739 * rcx = (managed) thread pointer 740 * r8 = JValue* result 741 * r9 = char* shorty 742 */ 743DEFINE_FUNCTION art_quick_invoke_static_stub 744#if defined(__APPLE__) 745 int3 746 int3 747#else 748 // Set up argument XMM registers. 749 leaq 1(%r9), %r10 // R10 := shorty + 1 ; ie skip return arg character 750 movq %rsi, %r11 // R11 := arg_array 751 LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2 752 LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2 753 LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2 754 LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2 755 LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2 756 LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2 757 LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2 758 LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2 759 .balign 16 760.Lxmm_setup_finished2: 761 PUSH rbp // Save rbp. 762 PUSH r8 // Save r8/result*. 763 PUSH r9 // Save r9/shorty*. 764 PUSH rbx // Save rbx 765 PUSH r12 // Save r12 766 PUSH r13 // Save r13 767 PUSH r14 // Save r14 768 PUSH r15 // Save r15 769 movq %rsp, %rbp // Copy value of stack pointer into base pointer. 770 CFI_DEF_CFA_REGISTER(rbp) 771 772 movl %edx, %r10d 773 addl LITERAL(100), %edx // Reserve space for return addr, StackReference<method>, rbp, 774 // r8, r9, r12, r13, r14, and r15 in frame. 775 andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes. 776 subl LITERAL(72), %edx // Remove space for return address, rbp, r8, r9, rbx, r12, 777 // r13, r14, and r15. 778 subq %rdx, %rsp // Reserve stack space for argument array. 779 780#if (STACK_REFERENCE_SIZE != 4) 781#error "STACK_REFERENCE_SIZE(X86_64) size not as expected." 782#endif 783 movq LITERAL(0), (%rsp) // Store null for method* 784 785 movl %r10d, %ecx // Place size of args in rcx. 786 movq %rdi, %rax // rax := method to be called 787 movq %rsi, %r11 // r11 := arg_array 788 leaq 8(%rsp), %rdi // rdi is pointing just above the ArtMethod* in the 789 // stack arguments. 790 // Copy arg array into stack. 791 rep movsb // while (rcx--) { *rdi++ = *rsi++ } 792 leaq 1(%r9), %r10 // r10 := shorty + 1 ; ie skip return arg character 793 movq %rax, %rdi // rdi := method to be called 794 LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2 795 LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2 796 LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2 797 LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2 798 LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2 799.Lgpr_setup_finished2: 800 call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method. 801 movq %rbp, %rsp // Restore stack pointer. 802 POP r15 // Pop r15 803 POP r14 // Pop r14 804 POP r13 // Pop r13 805 POP r12 // Pop r12 806 POP rbx // Pop rbx 807 POP r9 // Pop r9 - shorty*. 808 POP r8 // Pop r8 - result*. 809 POP rbp // Pop rbp 810 cmpb LITERAL(68), (%r9) // Test if result type char == 'D'. 811 je .Lreturn_double_quick2 812 cmpb LITERAL(70), (%r9) // Test if result type char == 'F'. 813 je .Lreturn_float_quick2 814 movq %rax, (%r8) // Store the result assuming its a long, int or Object* 815 ret 816.Lreturn_double_quick2: 817 movsd %xmm0, (%r8) // Store the double floating point result. 818 ret 819.Lreturn_float_quick2: 820 movss %xmm0, (%r8) // Store the floating point result. 821 ret 822#endif // __APPLE__ 823END_FUNCTION art_quick_invoke_static_stub 824 825 /* 826 * Long jump stub. 827 * On entry: 828 * rdi = gprs 829 * rsi = fprs 830 */ 831DEFINE_FUNCTION art_quick_do_long_jump 832#if defined(__APPLE__) 833 int3 834 int3 835#else 836 // Restore FPRs. 837 movq 0(%rsi), %xmm0 838 movq 8(%rsi), %xmm1 839 movq 16(%rsi), %xmm2 840 movq 24(%rsi), %xmm3 841 movq 32(%rsi), %xmm4 842 movq 40(%rsi), %xmm5 843 movq 48(%rsi), %xmm6 844 movq 56(%rsi), %xmm7 845 movq 64(%rsi), %xmm8 846 movq 72(%rsi), %xmm9 847 movq 80(%rsi), %xmm10 848 movq 88(%rsi), %xmm11 849 movq 96(%rsi), %xmm12 850 movq 104(%rsi), %xmm13 851 movq 112(%rsi), %xmm14 852 movq 120(%rsi), %xmm15 853 // Restore FPRs. 854 movq %rdi, %rsp // RSP points to gprs. 855 // Load all registers except RSP and RIP with values in gprs. 856 popq %r15 857 popq %r14 858 popq %r13 859 popq %r12 860 popq %r11 861 popq %r10 862 popq %r9 863 popq %r8 864 popq %rdi 865 popq %rsi 866 popq %rbp 867 addq LITERAL(8), %rsp // Skip rsp 868 popq %rbx 869 popq %rdx 870 popq %rcx 871 popq %rax 872 popq %rsp // Load stack pointer. 873 ret // From higher in the stack pop rip. 874#endif // __APPLE__ 875END_FUNCTION art_quick_do_long_jump 876 877MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro) 878 DEFINE_FUNCTION VAR(c_name) 879 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 880 // Outgoing argument set up 881 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 882 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 883 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 884 CALL_MACRO(return_macro) // return or deliver exception 885 END_FUNCTION VAR(c_name) 886END_MACRO 887 888MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro) 889 DEFINE_FUNCTION VAR(c_name) 890 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 891 // Outgoing argument set up 892 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 893 call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) 894 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 895 CALL_MACRO(return_macro) // return or deliver exception 896 END_FUNCTION VAR(c_name) 897END_MACRO 898 899MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro) 900 DEFINE_FUNCTION VAR(c_name) 901 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 902 // Outgoing argument set up 903 movq %gs:THREAD_SELF_OFFSET, %rcx // pass Thread::Current() 904 call CALLVAR(cxx_name) // cxx_name(arg0, arg1, arg2, Thread*) 905 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 906 CALL_MACRO(return_macro) // return or deliver exception 907 END_FUNCTION VAR(c_name) 908END_MACRO 909 910MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro) 911 DEFINE_FUNCTION VAR(c_name) 912 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 913 // Outgoing argument set up 914 movq %gs:THREAD_SELF_OFFSET, %r8 // pass Thread::Current() 915 call CALLVAR(cxx_name) // cxx_name(arg1, arg2, arg3, arg4, Thread*) 916 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 917 CALL_MACRO(return_macro) // return or deliver exception 918 END_FUNCTION VAR(c_name) 919END_MACRO 920 921MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro) 922 DEFINE_FUNCTION VAR(c_name) 923 SETUP_SAVE_REFS_ONLY_FRAME 924 // arg0 is in rdi 925 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 926 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 927 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 928 CALL_MACRO(return_macro) 929 END_FUNCTION VAR(c_name) 930END_MACRO 931 932MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro) 933 DEFINE_FUNCTION VAR(c_name) 934 SETUP_SAVE_REFS_ONLY_FRAME 935 // arg0 and arg1 are in rdi/rsi 936 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 937 call CALLVAR(cxx_name) // (arg0, arg1, Thread*) 938 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 939 CALL_MACRO(return_macro) 940 END_FUNCTION VAR(c_name) 941END_MACRO 942 943MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro) 944 DEFINE_FUNCTION VAR(c_name) 945 SETUP_SAVE_REFS_ONLY_FRAME 946 // arg0, arg1, and arg2 are in rdi/rsi/rdx 947 movq %gs:THREAD_SELF_OFFSET, %rcx // pass Thread::Current() 948 call CALLVAR(cxx_name) // cxx_name(arg0, arg1, arg2, Thread*) 949 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 950 CALL_MACRO(return_macro) // return or deliver exception 951 END_FUNCTION VAR(c_name) 952END_MACRO 953 954 /* 955 * Macro for resolution and initialization of indexed DEX file 956 * constants such as classes and strings. 957 */ 958MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET) 959 DEFINE_FUNCTION VAR(c_name) 960 SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset // save everything for GC 961 // Outgoing argument set up 962 movl %eax, %edi // pass the index of the constant as arg0 963 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 964 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 965 testl %eax, %eax // If result is null, deliver the OOME. 966 jz 1f 967 CFI_REMEMBER_STATE 968 RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX // restore frame up to return address 969 ret 970 CFI_RESTORE_STATE 971 CFI_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING) // workaround for clang bug: 31975598 9721: 973 DELIVER_PENDING_EXCEPTION_FRAME_READY 974 END_FUNCTION VAR(c_name) 975END_MACRO 976 977MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name) 978 ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET 979END_MACRO 980 981MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER) 982 testq %rax, %rax // rax == 0 ? 983 jz 1f // if rax == 0 goto 1 984 ret // return 9851: // deliver exception on current thread 986 DELIVER_PENDING_EXCEPTION 987END_MACRO 988 989MACRO0(RETURN_IF_EAX_ZERO) 990 testl %eax, %eax // eax == 0 ? 991 jnz 1f // if eax != 0 goto 1 992 ret // return 9931: // deliver exception on current thread 994 DELIVER_PENDING_EXCEPTION 995END_MACRO 996 997MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION) 998 movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field 999 testq %rcx, %rcx // rcx == 0 ? 1000 jnz 1f // if rcx != 0 goto 1 1001 ret // return 10021: // deliver exception on current thread 1003 DELIVER_PENDING_EXCEPTION 1004END_MACRO 1005 1006// Generate the allocation entrypoints for each allocator. 1007GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS 1008 1009// Comment out allocators that have x86_64 specific asm. 1010// Region TLAB: 1011// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) 1012// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) 1013GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) 1014GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB) 1015// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) 1016// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) 1017// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) 1018// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) 1019// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) 1020GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) 1021GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) 1022GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) 1023// Normal TLAB: 1024// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) 1025// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) 1026GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) 1027GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB) 1028// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) 1029// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) 1030// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) 1031// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) 1032// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) 1033GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) 1034GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) 1035GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) 1036 1037 1038// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc). 1039MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name) 1040 DEFINE_FUNCTION VAR(c_name) 1041 // Fast path rosalloc allocation. 1042 // RDI: mirror::Class*, RAX: return value 1043 // RSI, RDX, RCX, R8, R9: free. 1044 // Check if the thread local 1045 // allocation stack has room. 1046 movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread 1047 movq THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx // rcx = alloc stack top. 1048 cmpq THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx 1049 jae .Lslow_path\c_name 1050 // Load the object size 1051 movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax 1052 // Check if the size is for a thread 1053 // local allocation. Also does the 1054 // initialized and finalizable checks. 1055 cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax 1056 ja .Lslow_path\c_name 1057 // Compute the rosalloc bracket index 1058 // from the size. 1059 shrq LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax 1060 // Load the rosalloc run (r9) 1061 // Subtract __SIZEOF_POINTER__ to 1062 // subtract one from edi as there is no 1063 // 0 byte run and the size is already 1064 // aligned. 1065 movq (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9 1066 // Load the free list head (rax). This 1067 // will be the return val. 1068 movq (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax 1069 testq %rax, %rax 1070 jz .Lslow_path\c_name 1071 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi. 1072 // Push the new object onto the thread 1073 // local allocation stack and 1074 // increment the thread local 1075 // allocation stack top. 1076 movl %eax, (%rcx) 1077 addq LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx 1078 movq %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8) 1079 // Load the next pointer of the head 1080 // and update the list head with the 1081 // next pointer. 1082 movq ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx 1083 movq %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9) 1084 // Store the class pointer in the 1085 // header. This also overwrites the 1086 // next pointer. The offsets are 1087 // asserted to match. 1088#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET 1089#error "Class pointer needs to overwrite next pointer." 1090#endif 1091 POISON_HEAP_REF edi 1092 movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) 1093 // Decrement the size of the free list 1094 decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9) 1095 // No fence necessary for x86. 1096 ret 1097.Lslow_path\c_name: 1098 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 1099 // Outgoing argument set up 1100 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1101 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 1102 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1103 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception 1104 END_FUNCTION VAR(c_name) 1105END_MACRO 1106 1107ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc 1108ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc 1109 1110// The common fast path code for art_quick_alloc_object_resolved_region_tlab. 1111// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as 1112// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH. 1113// 1114// RDI: the class, RAX: return value. 1115// RCX, RSI, RDX: scratch, r8: Thread::Current(). 1116MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel) 1117 ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel)) 1118END_MACRO 1119 1120// The fast path code for art_quick_alloc_object_initialized_region_tlab. 1121// 1122// RDI: the class, RSI: ArtMethod*, RAX: return value. 1123// RCX, RSI, RDX: scratch, r8: Thread::Current(). 1124MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel) 1125 movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread 1126 movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size. 1127 movq THREAD_LOCAL_POS_OFFSET(%r8), %rax 1128 addq %rax, %rcx // Add size to pos, note that these 1129 // are both 32 bit ints, overflow 1130 // will cause the add to be past the 1131 // end of the thread local region. 1132 cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx // Check if it fits. 1133 ja RAW_VAR(slowPathLabel) 1134 movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8) // Update thread_local_pos. 1135 incq THREAD_LOCAL_OBJECTS_OFFSET(%r8) // Increase thread_local_objects. 1136 // Store the class pointer in the 1137 // header. 1138 // No fence needed for x86. 1139 POISON_HEAP_REF edi 1140 movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) 1141 ret // Fast path succeeded. 1142END_MACRO 1143 1144// The fast path code for art_quick_alloc_array_region_tlab. 1145// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size 1146// Free temps: RCX, RDX, R8 1147// Output: RAX: return value. 1148MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel) 1149 movq %gs:THREAD_SELF_OFFSET, %rcx // rcx = thread 1150 // Mask out the unaligned part to make sure we are 8 byte aligned. 1151 andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9 1152 movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax 1153 addq %rax, %r9 1154 cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9 // Check if it fits. 1155 ja RAW_VAR(slowPathLabel) 1156 movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx) // Update thread_local_pos. 1157 addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx) // Increase thread_local_objects. 1158 // Store the class pointer in the 1159 // header. 1160 // No fence needed for x86. 1161 POISON_HEAP_REF edi 1162 movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) 1163 movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax) 1164 ret // Fast path succeeded. 1165END_MACRO 1166 1167// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab 1168// and art_quick_alloc_object_{resolved, initialized}_region_tlab. 1169MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) 1170 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 1171 // Outgoing argument set up 1172 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1173 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 1174 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1175 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception 1176END_MACRO 1177 1178// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be 1179// called with CC if the GC is not active. 1180DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab 1181 // RDI: mirror::Class* klass 1182 // RDX, RSI, RCX, R8, R9: free. RAX: return val. 1183 ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path 1184.Lart_quick_alloc_object_resolved_tlab_slow_path: 1185 ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB 1186END_FUNCTION art_quick_alloc_object_resolved_tlab 1187 1188// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB). 1189// May be called with CC if the GC is not active. 1190DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab 1191 // RDI: mirror::Class* klass 1192 // RDX, RSI, RCX, R8, R9: free. RAX: return val. 1193 ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path 1194.Lart_quick_alloc_object_initialized_tlab_slow_path: 1195 ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB 1196END_FUNCTION art_quick_alloc_object_initialized_tlab 1197 1198MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN) 1199 movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx // Load component type. 1200 UNPOISON_HEAP_REF ecx 1201 movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type. 1202 shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx // Get component size shift. 1203 movq %rsi, %r9 1204 salq %cl, %r9 // Calculate array count shifted. 1205 // Add array header + alignment rounding. 1206 addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1207 // Add 4 extra bytes if we are doing a long array. 1208 addq MACRO_LITERAL(1), %rcx 1209 andq MACRO_LITERAL(4), %rcx 1210#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 1211#error Long array data offset must be 4 greater than int array data offset. 1212#endif 1213 addq %rcx, %r9 1214END_MACRO 1215 1216MACRO0(COMPUTE_ARRAY_SIZE_8) 1217 // RDI: mirror::Class* klass, RSI: int32_t component_count 1218 // RDX, RCX, R8, R9: free. RAX: return val. 1219 movq %rsi, %r9 1220 // Add array header + alignment rounding. 1221 addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1222END_MACRO 1223 1224MACRO0(COMPUTE_ARRAY_SIZE_16) 1225 // RDI: mirror::Class* klass, RSI: int32_t component_count 1226 // RDX, RCX, R8, R9: free. RAX: return val. 1227 movq %rsi, %r9 1228 salq MACRO_LITERAL(1), %r9 1229 // Add array header + alignment rounding. 1230 addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1231END_MACRO 1232 1233MACRO0(COMPUTE_ARRAY_SIZE_32) 1234 // RDI: mirror::Class* klass, RSI: int32_t component_count 1235 // RDX, RCX, R8, R9: free. RAX: return val. 1236 movq %rsi, %r9 1237 salq MACRO_LITERAL(2), %r9 1238 // Add array header + alignment rounding. 1239 addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1240END_MACRO 1241 1242MACRO0(COMPUTE_ARRAY_SIZE_64) 1243 // RDI: mirror::Class* klass, RSI: int32_t component_count 1244 // RDX, RCX, R8, R9: free. RAX: return val. 1245 movq %rsi, %r9 1246 salq MACRO_LITERAL(3), %r9 1247 // Add array header + alignment rounding. 1248 addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1249END_MACRO 1250 1251MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup) 1252 DEFINE_FUNCTION VAR(c_entrypoint) 1253 // RDI: mirror::Class* klass, RSI: int32_t component_count 1254 // RDX, RCX, R8, R9: free. RAX: return val. 1255 CALL_MACRO(size_setup) 1256 ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint 1257.Lslow_path\c_entrypoint: 1258 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 1259 // Outgoing argument set up 1260 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 1261 call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) 1262 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1263 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception 1264 END_FUNCTION VAR(c_entrypoint) 1265END_MACRO 1266 1267 1268GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1269GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 1270GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 1271GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32 1272GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64 1273 1274GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1275GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8 1276GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16 1277GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32 1278GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64 1279 1280// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB). 1281DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab 1282 // Fast path region tlab allocation. 1283 // RDI: mirror::Class* klass 1284 // RDX, RSI, RCX, R8, R9: free. RAX: return val. 1285 ASSERT_USE_READ_BARRIER 1286 ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path 1287.Lart_quick_alloc_object_resolved_region_tlab_slow_path: 1288 ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB 1289END_FUNCTION art_quick_alloc_object_resolved_region_tlab 1290 1291// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB). 1292DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab 1293 // Fast path region tlab allocation. 1294 // RDI: mirror::Class* klass 1295 // RDX, RSI, RCX, R8, R9: free. RAX: return val. 1296 ASSERT_USE_READ_BARRIER 1297 // No read barrier since the caller is responsible for that. 1298 ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path 1299.Lart_quick_alloc_object_initialized_region_tlab_slow_path: 1300 ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB 1301END_FUNCTION art_quick_alloc_object_initialized_region_tlab 1302 1303ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode 1304ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode 1305ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode 1306ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode 1307ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode 1308ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode 1309 1310TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO 1311 1312DEFINE_FUNCTION art_quick_lock_object 1313 testl %edi, %edi // Null check object/rdi. 1314 jz .Lslow_lock 1315.Lretry_lock: 1316 movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word. 1317 test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx // Test the 2 high bits. 1318 jne .Lslow_lock // Slow path if either of the two high bits are set. 1319 movl %ecx, %edx // save lock word (edx) to keep read barrier bits. 1320 andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. 1321 test %ecx, %ecx 1322 jnz .Lalready_thin // Lock word contains a thin lock. 1323 // unlocked case - edx: original lock word, edi: obj. 1324 movl %edx, %eax // eax: lock word zero except for read barrier bits. 1325 movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id 1326 or %eax, %edx // edx: thread id with count of 0 + read barrier bits. 1327 lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) 1328 jnz .Lretry_lock // cmpxchg failed retry 1329 ret 1330.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), edi: obj. 1331 movl %gs:THREAD_ID_OFFSET, %ecx // ecx := thread id 1332 cmpw %cx, %dx // do we hold the lock already? 1333 jne .Lslow_lock 1334 movl %edx, %ecx // copy the lock word to check count overflow. 1335 andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. 1336 addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count 1337 test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if the upper bit (28) is set 1338 jne .Lslow_lock // count overflowed so go slow 1339 movl %edx, %eax // copy the lock word as the old val for cmpxchg. 1340 addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real. 1341 // update lockword, cmpxchg necessary for read barrier bits. 1342 lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, edx: new val. 1343 jnz .Lretry_lock // cmpxchg failed retry 1344 ret 1345.Lslow_lock: 1346 SETUP_SAVE_REFS_ONLY_FRAME 1347 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1348 call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) 1349 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1350 RETURN_IF_EAX_ZERO 1351END_FUNCTION art_quick_lock_object 1352 1353DEFINE_FUNCTION art_quick_lock_object_no_inline 1354 SETUP_SAVE_REFS_ONLY_FRAME 1355 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1356 call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) 1357 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1358 RETURN_IF_EAX_ZERO 1359END_FUNCTION art_quick_lock_object_no_inline 1360 1361DEFINE_FUNCTION art_quick_unlock_object 1362 testl %edi, %edi // null check object/edi 1363 jz .Lslow_unlock 1364.Lretry_unlock: 1365 movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word 1366 movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id 1367 test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx 1368 jnz .Lslow_unlock // lock word contains a monitor 1369 cmpw %cx, %dx // does the thread id match? 1370 jne .Lslow_unlock 1371 movl %ecx, %edx // copy the lock word to detect new count of 0. 1372 andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits. 1373 cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx 1374 jae .Lrecursive_thin_unlock 1375 // update lockword, cmpxchg necessary for read barrier bits. 1376 movl %ecx, %eax // eax: old lock word. 1377 andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original gc bits. 1378#ifndef USE_READ_BARRIER 1379 movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) 1380#else 1381 lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. 1382 jnz .Lretry_unlock // cmpxchg failed retry 1383#endif 1384 ret 1385.Lrecursive_thin_unlock: // ecx: original lock word, edi: obj 1386 // update lockword, cmpxchg necessary for read barrier bits. 1387 movl %ecx, %eax // eax: old lock word. 1388 subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx 1389#ifndef USE_READ_BARRIER 1390 mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) 1391#else 1392 lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. 1393 jnz .Lretry_unlock // cmpxchg failed retry 1394#endif 1395 ret 1396.Lslow_unlock: 1397 SETUP_SAVE_REFS_ONLY_FRAME 1398 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1399 call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) 1400 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1401 RETURN_IF_EAX_ZERO 1402END_FUNCTION art_quick_unlock_object 1403 1404DEFINE_FUNCTION art_quick_unlock_object_no_inline 1405 SETUP_SAVE_REFS_ONLY_FRAME 1406 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1407 call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) 1408 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1409 RETURN_IF_EAX_ZERO 1410END_FUNCTION art_quick_unlock_object_no_inline 1411 1412DEFINE_FUNCTION art_quick_check_instance_of 1413 // Type check using the bit string passes null as the target class. In that case just throw. 1414 testl %esi, %esi 1415 jz .Lthrow_class_cast_exception_for_bitstring_check 1416 1417 // We could check the super classes here but that is usually already checked in the caller. 1418 PUSH rdi // Save args for exc 1419 PUSH rsi 1420 subq LITERAL(8), %rsp // Alignment padding. 1421 CFI_ADJUST_CFA_OFFSET(8) 1422 SETUP_FP_CALLEE_SAVE_FRAME 1423 call SYMBOL(artInstanceOfFromCode) // (Object* obj, Class* ref_klass) 1424 testq %rax, %rax 1425 jz .Lthrow_class_cast_exception // jump forward if not assignable 1426 CFI_REMEMBER_STATE 1427 RESTORE_FP_CALLEE_SAVE_FRAME 1428 addq LITERAL(24), %rsp // pop arguments 1429 CFI_ADJUST_CFA_OFFSET(-24) 1430 ret 1431 CFI_RESTORE_STATE // Reset unwind info so following code unwinds. 1432 1433.Lthrow_class_cast_exception: 1434 RESTORE_FP_CALLEE_SAVE_FRAME 1435 addq LITERAL(8), %rsp // pop padding 1436 CFI_ADJUST_CFA_OFFSET(-8) 1437 POP rsi // Pop arguments 1438 POP rdi 1439 1440.Lthrow_class_cast_exception_for_bitstring_check: 1441 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context 1442 mov %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 1443 call SYMBOL(artThrowClassCastExceptionForObject) // (Object* src, Class* dest, Thread*) 1444 UNREACHABLE 1445END_FUNCTION art_quick_check_instance_of 1446 1447 1448// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack. 1449MACRO2(POP_REG_NE, reg, exclude_reg) 1450 .ifc RAW_VAR(reg), RAW_VAR(exclude_reg) 1451 addq MACRO_LITERAL(8), %rsp 1452 CFI_ADJUST_CFA_OFFSET(-8) 1453 .else 1454 POP RAW_VAR(reg) 1455 .endif 1456END_MACRO 1457 1458 /* 1459 * Macro to insert read barrier, used in art_quick_aput_obj. 1460 * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as 1461 * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between 1462 * 64b PUSH/POP and 32b argument. 1463 * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path. 1464 * 1465 * As with art_quick_aput_obj function, the 64b versions are in comments. 1466 */ 1467MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64) 1468#ifdef USE_READ_BARRIER 1469 PUSH rax // save registers that might be used 1470 PUSH rdi 1471 PUSH rsi 1472 PUSH rdx 1473 PUSH rcx 1474 SETUP_FP_CALLEE_SAVE_FRAME 1475 // Outgoing argument set up 1476 // movl REG_VAR(ref_reg32), %edi // pass ref, no-op for now since parameter ref is unused 1477 // // movq REG_VAR(ref_reg64), %rdi 1478 movl REG_VAR(obj_reg), %esi // pass obj_reg 1479 // movq REG_VAR(obj_reg), %rsi 1480 movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary 1481 // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx 1482 call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj_reg, offset) 1483 // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning. 1484 .ifnc RAW_VAR(dest_reg32), eax 1485 // .ifnc RAW_VAR(dest_reg64), rax 1486 movl %eax, REG_VAR(dest_reg32) // save loaded ref in dest_reg 1487 // movq %rax, REG_VAR(dest_reg64) 1488 .endif 1489 RESTORE_FP_CALLEE_SAVE_FRAME 1490 POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg 1491 POP_REG_NE rdx, RAW_VAR(dest_reg64) 1492 POP_REG_NE rsi, RAW_VAR(dest_reg64) 1493 POP_REG_NE rdi, RAW_VAR(dest_reg64) 1494 POP_REG_NE rax, RAW_VAR(dest_reg64) 1495#else 1496 movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32) 1497 // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64) 1498 UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register 1499#endif // USE_READ_BARRIER 1500END_MACRO 1501 1502DEFINE_FUNCTION art_quick_aput_obj 1503 testl %edx, %edx // store of null 1504// test %rdx, %rdx 1505 jz .Ldo_aput_null 1506 READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx 1507 // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx 1508 READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx 1509 // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx 1510#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER) 1511 READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax // rax is free. 1512 // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax 1513 cmpl %eax, %ecx // value's type == array's component type - trivial assignability 1514#else 1515 cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability 1516// cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx 1517#endif 1518 jne .Lcheck_assignability 1519.Ldo_aput: 1520 POISON_HEAP_REF edx 1521 movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4) 1522// movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4) 1523 movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx 1524 shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi 1525// shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi 1526 movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero 1527 ret 1528.Ldo_aput_null: 1529 movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4) 1530// movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4) 1531 ret 1532.Lcheck_assignability: 1533 // Save arguments. 1534 PUSH rdi 1535 PUSH rsi 1536 PUSH rdx 1537 SETUP_FP_CALLEE_SAVE_FRAME 1538 1539#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER) 1540 // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value. 1541 movl %eax, %esi // Pass arg2 = value's class. 1542 // movq %rax, %rsi 1543#else 1544 // "Uncompress" = do nothing, as already zero-extended on load. 1545 movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class. 1546#endif 1547 movq %rcx, %rdi // Pass arg1 = array's component type. 1548 1549 call SYMBOL(artIsAssignableFromCode) // (Class* a, Class* b) 1550 1551 // Exception? 1552 testq %rax, %rax 1553 jz .Lthrow_array_store_exception 1554 1555 RESTORE_FP_CALLEE_SAVE_FRAME 1556 // Restore arguments. 1557 POP rdx 1558 POP rsi 1559 POP rdi 1560 1561 POISON_HEAP_REF edx 1562 movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4) 1563// movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4) 1564 movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx 1565 shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi 1566// shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi 1567 movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero 1568// movb %dl, (%rdx, %rdi) 1569 ret 1570 CFI_ADJUST_CFA_OFFSET(24 + 4 * 8) // Reset unwind info so following code unwinds. 1571.Lthrow_array_store_exception: 1572 RESTORE_FP_CALLEE_SAVE_FRAME 1573 // Restore arguments. 1574 POP rdx 1575 POP rsi 1576 POP rdi 1577 1578 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // Save all registers as basis for long jump context. 1579 1580 // Outgoing argument set up. 1581 movq %rdx, %rsi // Pass arg 2 = value. 1582 movq %gs:THREAD_SELF_OFFSET, %rdx // Pass arg 3 = Thread::Current(). 1583 // Pass arg 1 = array. 1584 call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*) 1585 UNREACHABLE 1586END_FUNCTION art_quick_aput_obj 1587 1588// TODO: This is quite silly on X86_64 now. 1589DEFINE_FUNCTION art_quick_memcpy 1590 call PLT_SYMBOL(memcpy) // (void*, const void*, size_t) 1591 ret 1592END_FUNCTION art_quick_memcpy 1593 1594DEFINE_FUNCTION art_quick_test_suspend 1595 SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET // save everything for GC 1596 // Outgoing argument set up 1597 movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current() 1598 call SYMBOL(artTestSuspendFromCode) // (Thread*) 1599 RESTORE_SAVE_EVERYTHING_FRAME // restore frame up to return address 1600 ret 1601END_FUNCTION art_quick_test_suspend 1602 1603UNIMPLEMENTED art_quick_ldiv 1604UNIMPLEMENTED art_quick_lmod 1605UNIMPLEMENTED art_quick_lmul 1606UNIMPLEMENTED art_quick_lshl 1607UNIMPLEMENTED art_quick_lshr 1608UNIMPLEMENTED art_quick_lushr 1609 1610// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are 1611// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc. 1612 1613THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1614THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1615THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1616THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1617THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1618 1619TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1620TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1621TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1622TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1623TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1624TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1625TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1626 1627TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_EAX_ZERO 1628TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_EAX_ZERO 1629TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_EAX_ZERO 1630TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1631TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_EAX_ZERO 1632 1633ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1634ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1635ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1636ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1637ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1638ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1639ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1640 1641DEFINE_FUNCTION art_quick_proxy_invoke_handler 1642 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI 1643 1644 movq %gs:THREAD_SELF_OFFSET, %rdx // Pass Thread::Current(). 1645 movq %rsp, %rcx // Pass SP. 1646 call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP) 1647 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1648 movq %rax, %xmm0 // Copy return value in case of float returns. 1649 RETURN_OR_DELIVER_PENDING_EXCEPTION 1650END_FUNCTION art_quick_proxy_invoke_handler 1651 1652 /* 1653 * Called to resolve an imt conflict. 1654 * rdi is the conflict ArtMethod. 1655 * rax is a hidden argument that holds the target interface method's dex method index. 1656 * 1657 * Note that this stub writes to r10, r11, rax and rdi. 1658 */ 1659DEFINE_FUNCTION art_quick_imt_conflict_trampoline 1660#if defined(__APPLE__) 1661 int3 1662 int3 1663#else 1664 movq __SIZEOF_POINTER__(%rsp), %r10 // Load referrer. 1665 mov %eax, %r11d // Remember method index in R11. 1666 PUSH rdx // Preserve RDX as we need to clobber it by LOCK CMPXCHG16B. 1667 // If the method is obsolete, just go through the dex cache miss slow path. 1668 // The obsolete flag is set with suspended threads, so we do not need an acquire operation here. 1669 testl LITERAL(ACC_OBSOLETE_METHOD), ART_METHOD_ACCESS_FLAGS_OFFSET(%r10) 1670 jnz .Limt_conflict_trampoline_dex_cache_miss 1671 movl ART_METHOD_DECLARING_CLASS_OFFSET(%r10), %r10d // Load declaring class (no read barrier). 1672 movl MIRROR_CLASS_DEX_CACHE_OFFSET(%r10), %r10d // Load the DexCache (without read barrier). 1673 UNPOISON_HEAP_REF r10d 1674 movq MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET(%r10), %r10 // Load the resolved methods. 1675 andl LITERAL(METHOD_DEX_CACHE_SIZE_MINUS_ONE), %eax // Calculate DexCache method slot index. 1676 shll LITERAL(1), %eax // Multiply by 2 as entries have size 2 * __SIZEOF_POINTER__. 1677 leaq 0(%r10, %rax, __SIZEOF_POINTER__), %r10 // Load DexCache method slot address. 1678 mov %rcx, %rdx // Make RDX:RAX == RCX:RBX so that LOCK CMPXCHG16B makes no changes. 1679 mov %rbx, %rax // (The actual value does not matter.) 1680 lock cmpxchg16b (%r10) // Relaxed atomic load RDX:RAX from the dex cache slot. 1681 movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi // Load ImtConflictTable 1682 cmp %rdx, %r11 // Compare method index to see if we had a DexCache method hit. 1683 jne .Limt_conflict_trampoline_dex_cache_miss 1684.Limt_table_iterate: 1685 cmpq %rax, 0(%rdi) 1686 jne .Limt_table_next_entry 1687 // We successfully hit an entry in the table. Load the target method 1688 // and jump to it. 1689 movq __SIZEOF_POINTER__(%rdi), %rdi 1690 CFI_REMEMBER_STATE 1691 POP rdx 1692 jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) 1693 CFI_RESTORE_STATE 1694.Limt_table_next_entry: 1695 // If the entry is null, the interface method is not in the ImtConflictTable. 1696 cmpq LITERAL(0), 0(%rdi) 1697 jz .Lconflict_trampoline 1698 // Iterate over the entries of the ImtConflictTable. 1699 addq LITERAL(2 * __SIZEOF_POINTER__), %rdi 1700 jmp .Limt_table_iterate 1701.Lconflict_trampoline: 1702 // Call the runtime stub to populate the ImtConflictTable and jump to the 1703 // resolved method. 1704 CFI_REMEMBER_STATE 1705 POP rdx 1706 movq %rax, %rdi // Load interface method 1707 INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline 1708 CFI_RESTORE_STATE 1709.Limt_conflict_trampoline_dex_cache_miss: 1710 // We're not creating a proper runtime method frame here, 1711 // artLookupResolvedMethod() is not allowed to walk the stack. 1712 1713 // Save GPR args and ImtConflictTable; RDX is already saved. 1714 PUSH r9 // Quick arg 5. 1715 PUSH r8 // Quick arg 4. 1716 PUSH rsi // Quick arg 1. 1717 PUSH rcx // Quick arg 3. 1718 PUSH rdi // ImtConflictTable 1719 // Save FPR args and callee-saves, align stack to 16B. 1720 subq MACRO_LITERAL(12 * 8 + 8), %rsp 1721 CFI_ADJUST_CFA_OFFSET(12 * 8 + 8) 1722 movq %xmm0, 0(%rsp) 1723 movq %xmm1, 8(%rsp) 1724 movq %xmm2, 16(%rsp) 1725 movq %xmm3, 24(%rsp) 1726 movq %xmm4, 32(%rsp) 1727 movq %xmm5, 40(%rsp) 1728 movq %xmm6, 48(%rsp) 1729 movq %xmm7, 56(%rsp) 1730 movq %xmm12, 64(%rsp) // XMM12-15 are callee-save in ART compiled code ABI 1731 movq %xmm13, 72(%rsp) // but caller-save in native ABI. 1732 movq %xmm14, 80(%rsp) 1733 movq %xmm15, 88(%rsp) 1734 1735 movq %r11, %rdi // Pass method index. 1736 movq 12 * 8 + 8 + 6 * 8 + 8(%rsp), %rsi // Pass referrer. 1737 call SYMBOL(artLookupResolvedMethod) // (uint32_t method_index, ArtMethod* referrer) 1738 1739 // Restore FPRs. 1740 movq 0(%rsp), %xmm0 1741 movq 8(%rsp), %xmm1 1742 movq 16(%rsp), %xmm2 1743 movq 24(%rsp), %xmm3 1744 movq 32(%rsp), %xmm4 1745 movq 40(%rsp), %xmm5 1746 movq 48(%rsp), %xmm6 1747 movq 56(%rsp), %xmm7 1748 movq 64(%rsp), %xmm12 1749 movq 72(%rsp), %xmm13 1750 movq 80(%rsp), %xmm14 1751 movq 88(%rsp), %xmm15 1752 addq MACRO_LITERAL(12 * 8 + 8), %rsp 1753 CFI_ADJUST_CFA_OFFSET(-(12 * 8 + 8)) 1754 // Restore ImtConflictTable and GPR args. 1755 POP rdi 1756 POP rcx 1757 POP rsi 1758 POP r8 1759 POP r9 1760 1761 cmp LITERAL(0), %rax // If the method wasn't resolved, 1762 je .Lconflict_trampoline // skip the lookup and go to artInvokeInterfaceTrampoline(). 1763 jmp .Limt_table_iterate 1764#endif // __APPLE__ 1765END_FUNCTION art_quick_imt_conflict_trampoline 1766 1767DEFINE_FUNCTION art_quick_resolution_trampoline 1768 SETUP_SAVE_REFS_AND_ARGS_FRAME 1769 movq %gs:THREAD_SELF_OFFSET, %rdx 1770 movq %rsp, %rcx 1771 call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP) 1772 movq %rax, %r10 // Remember returned code pointer in R10. 1773 movq (%rsp), %rdi // Load called method into RDI. 1774 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1775 testq %r10, %r10 // If code pointer is null goto deliver pending exception. 1776 jz 1f 1777 jmp *%r10 // Tail call into method. 17781: 1779 DELIVER_PENDING_EXCEPTION 1780END_FUNCTION art_quick_resolution_trampoline 1781 1782/* Generic JNI frame layout: 1783 * 1784 * #-------------------# 1785 * | | 1786 * | caller method... | 1787 * #-------------------# <--- SP on entry 1788 * 1789 * | 1790 * V 1791 * 1792 * #-------------------# 1793 * | caller method... | 1794 * #-------------------# 1795 * | Return | 1796 * | R15 | callee save 1797 * | R14 | callee save 1798 * | R13 | callee save 1799 * | R12 | callee save 1800 * | R9 | arg5 1801 * | R8 | arg4 1802 * | RSI/R6 | arg1 1803 * | RBP/R5 | callee save 1804 * | RBX/R3 | callee save 1805 * | RDX/R2 | arg2 1806 * | RCX/R1 | arg3 1807 * | XMM7 | float arg 8 1808 * | XMM6 | float arg 7 1809 * | XMM5 | float arg 6 1810 * | XMM4 | float arg 5 1811 * | XMM3 | float arg 4 1812 * | XMM2 | float arg 3 1813 * | XMM1 | float arg 2 1814 * | XMM0 | float arg 1 1815 * | RDI/Method* | <- sp 1816 * #-------------------# 1817 * | Scratch Alloca | 5K scratch space 1818 * #---------#---------# 1819 * | | sp* | 1820 * | Tramp. #---------# 1821 * | args | thread | 1822 * | Tramp. #---------# 1823 * | | method | 1824 * #-------------------# <--- SP on artQuickGenericJniTrampoline 1825 * 1826 * | 1827 * v artQuickGenericJniTrampoline 1828 * 1829 * #-------------------# 1830 * | caller method... | 1831 * #-------------------# 1832 * | Return | 1833 * | Callee-Save Data | 1834 * #-------------------# 1835 * | handle scope | 1836 * #-------------------# 1837 * | Method* | <--- (1) 1838 * #-------------------# 1839 * | local ref cookie | // 4B 1840 * | handle scope size | // 4B TODO: roll into call stack alignment? 1841 * #-------------------# 1842 * | JNI Call Stack | 1843 * #-------------------# <--- SP on native call 1844 * | | 1845 * | Stack for Regs | The trampoline assembly will pop these values 1846 * | | into registers for native call 1847 * #-------------------# 1848 * | Native code ptr | 1849 * #-------------------# 1850 * | Free scratch | 1851 * #-------------------# 1852 * | Ptr to (1) | <--- RSP 1853 * #-------------------# 1854 */ 1855 /* 1856 * Called to do a generic JNI down-call 1857 */ 1858DEFINE_FUNCTION art_quick_generic_jni_trampoline 1859 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI 1860 1861 movq %rsp, %rbp // save SP at (old) callee-save frame 1862 CFI_DEF_CFA_REGISTER(rbp) 1863 1864 // 1865 // reserve a lot of space 1866 // 1867 // 4 local state ref 1868 // 4 padding 1869 // 4196 4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?) 1870 // 16 handle scope member fields ? 1871 // + 112 14x 8-byte stack-2-register space 1872 // ------ 1873 // 4332 1874 // 16-byte aligned: 4336 1875 // Note: 14x8 = 7*16, so the stack stays aligned for the native call... 1876 // Also means: the padding is somewhere in the middle 1877 // 1878 // 1879 // New test: use 5K and release 1880 // 5k = 5120 1881 subq LITERAL(5120), %rsp 1882 // prepare for artQuickGenericJniTrampoline call 1883 // (Thread*, SP) 1884 // rdi rsi <= C calling convention 1885 // gs:... rbp <= where they are 1886 movq %gs:THREAD_SELF_OFFSET, %rdi 1887 movq %rbp, %rsi 1888 call SYMBOL(artQuickGenericJniTrampoline) // (Thread*, sp) 1889 1890 // The C call will have registered the complete save-frame on success. 1891 // The result of the call is: 1892 // %rax: pointer to native code, 0 on error. 1893 // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there. 1894 1895 // Check for error = 0. 1896 test %rax, %rax 1897 jz .Lexception_in_native 1898 1899 // Release part of the alloca. 1900 movq %rdx, %rsp 1901 1902 // pop from the register-passing alloca region 1903 // what's the right layout? 1904 popq %rdi 1905 popq %rsi 1906 popq %rdx 1907 popq %rcx 1908 popq %r8 1909 popq %r9 1910 // TODO: skip floating point if unused, some flag. 1911 movq 0(%rsp), %xmm0 1912 movq 8(%rsp), %xmm1 1913 movq 16(%rsp), %xmm2 1914 movq 24(%rsp), %xmm3 1915 movq 32(%rsp), %xmm4 1916 movq 40(%rsp), %xmm5 1917 movq 48(%rsp), %xmm6 1918 movq 56(%rsp), %xmm7 1919 addq LITERAL(64), %rsp // floating-point done 1920 1921 // native call 1922 call *%rax 1923 1924 // result sign extension is handled in C code 1925 // prepare for artQuickGenericJniEndTrampoline call 1926 // (Thread*, result, result_f) 1927 // rdi rsi rdx <= C calling convention 1928 // gs:... rax xmm0 <= where they are 1929 movq %gs:THREAD_SELF_OFFSET, %rdi 1930 movq %rax, %rsi 1931 movq %xmm0, %rdx 1932 call SYMBOL(artQuickGenericJniEndTrampoline) 1933 1934 // Pending exceptions possible. 1935 // TODO: use cmpq, needs direct encoding because of gas bug 1936 movq %gs:THREAD_EXCEPTION_OFFSET, %rcx 1937 test %rcx, %rcx 1938 jnz .Lexception_in_native 1939 1940 // Tear down the alloca. 1941 movq %rbp, %rsp 1942 CFI_DEF_CFA_REGISTER(rsp) 1943 1944 // Tear down the callee-save frame. 1945 // Load FPRs. 1946 // movq %xmm0, 16(%rsp) // doesn't make sense!!! 1947 movq 24(%rsp), %xmm1 // neither does this!!! 1948 movq 32(%rsp), %xmm2 1949 movq 40(%rsp), %xmm3 1950 movq 48(%rsp), %xmm4 1951 movq 56(%rsp), %xmm5 1952 movq 64(%rsp), %xmm6 1953 movq 72(%rsp), %xmm7 1954 movq 80(%rsp), %xmm12 1955 movq 88(%rsp), %xmm13 1956 movq 96(%rsp), %xmm14 1957 movq 104(%rsp), %xmm15 1958 // was 80 bytes 1959 addq LITERAL(80 + 4*8), %rsp 1960 CFI_ADJUST_CFA_OFFSET(-80 - 4*8) 1961 // Save callee and GPR args, mixed together to agree with core spills bitmap. 1962 POP rcx // Arg. 1963 POP rdx // Arg. 1964 POP rbx // Callee save. 1965 POP rbp // Callee save. 1966 POP rsi // Arg. 1967 POP r8 // Arg. 1968 POP r9 // Arg. 1969 POP r12 // Callee save. 1970 POP r13 // Callee save. 1971 POP r14 // Callee save. 1972 POP r15 // Callee save. 1973 // store into fpr, for when it's a fpr return... 1974 movq %rax, %xmm0 1975 ret 1976.Lexception_in_native: 1977 pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET 1978 addq LITERAL(-1), (%rsp) // Remove the GenericJNI tag. 1979 movq (%rsp), %rsp 1980 CFI_DEF_CFA_REGISTER(rsp) 1981 // Do a call to push a new save-all frame required by the runtime. 1982 call .Lexception_call 1983.Lexception_call: 1984 DELIVER_PENDING_EXCEPTION 1985END_FUNCTION art_quick_generic_jni_trampoline 1986 1987 /* 1988 * Called to bridge from the quick to interpreter ABI. On entry the arguments match those 1989 * of a quick call: 1990 * RDI = method being called / to bridge to. 1991 * RSI, RDX, RCX, R8, R9 are arguments to that method. 1992 */ 1993DEFINE_FUNCTION art_quick_to_interpreter_bridge 1994 SETUP_SAVE_REFS_AND_ARGS_FRAME // Set up frame and save arguments. 1995 movq %gs:THREAD_SELF_OFFSET, %rsi // RSI := Thread::Current() 1996 movq %rsp, %rdx // RDX := sp 1997 call SYMBOL(artQuickToInterpreterBridge) // (method, Thread*, SP) 1998 RESTORE_SAVE_REFS_AND_ARGS_FRAME // TODO: no need to restore arguments in this case. 1999 movq %rax, %xmm0 // Place return value also into floating point return value. 2000 RETURN_OR_DELIVER_PENDING_EXCEPTION // return or deliver exception 2001END_FUNCTION art_quick_to_interpreter_bridge 2002 2003 /* 2004 * Called to catch an attempt to invoke an obsolete method. 2005 * RDI = method being called. 2006 */ 2007ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod 2008 2009 /* 2010 * Routine that intercepts method calls and returns. 2011 */ 2012DEFINE_FUNCTION art_quick_instrumentation_entry 2013#if defined(__APPLE__) 2014 int3 2015 int3 2016#else 2017 SETUP_SAVE_REFS_AND_ARGS_FRAME 2018 2019 movq %rdi, %r12 // Preserve method pointer in a callee-save. 2020 2021 movq %gs:THREAD_SELF_OFFSET, %rdx // Pass thread. 2022 movq %rsp, %rcx // Pass SP. 2023 2024 call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP) 2025 2026 // %rax = result of call. 2027 testq %rax, %rax 2028 jz 1f 2029 2030 movq %r12, %rdi // Reload method pointer. 2031 leaq art_quick_instrumentation_exit(%rip), %r12 // Set up return through instrumentation 2032 movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp) // exit. 2033 2034 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2035 2036 jmp *%rax // Tail call to intended method. 20371: 2038 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2039 DELIVER_PENDING_EXCEPTION 2040#endif // __APPLE__ 2041END_FUNCTION art_quick_instrumentation_entry 2042 2043DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0 2044 pushq LITERAL(0) // Push a fake return PC as there will be none on the stack. 2045 CFI_ADJUST_CFA_OFFSET(8) 2046 2047 SETUP_SAVE_EVERYTHING_FRAME 2048 2049 leaq 16(%rsp), %rcx // Pass floating-point result pointer, in kSaveEverything frame. 2050 leaq 144(%rsp), %rdx // Pass integer result pointer, in kSaveEverything frame. 2051 movq %rsp, %rsi // Pass SP. 2052 movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. 2053 2054 call SYMBOL(artInstrumentationMethodExitFromCode) // (Thread*, SP, gpr_res*, fpr_res*) 2055 2056 testq %rax, %rax // Check if we have a return-pc to go to. If we don't then there was 2057 // an exception 2058 jz .Ldo_deliver_instrumentation_exception 2059 testq %rdx, %rdx 2060 jnz .Ldeoptimize 2061 // Normal return. 2062 movq %rax, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp) // Set return pc. 2063 RESTORE_SAVE_EVERYTHING_FRAME 2064 ret 2065.Ldeoptimize: 2066 movq %rdx, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp) // Set return pc. 2067 RESTORE_SAVE_EVERYTHING_FRAME 2068 // Jump to art_quick_deoptimize. 2069 jmp SYMBOL(art_quick_deoptimize) 2070.Ldo_deliver_instrumentation_exception: 2071 DELIVER_PENDING_EXCEPTION_FRAME_READY 2072END_FUNCTION art_quick_instrumentation_exit 2073 2074 /* 2075 * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization 2076 * will long jump to the upcall with a special exception of -1. 2077 */ 2078DEFINE_FUNCTION art_quick_deoptimize 2079 SETUP_SAVE_EVERYTHING_FRAME // Stack should be aligned now. 2080 movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. 2081 call SYMBOL(artDeoptimize) // (Thread*) 2082 UNREACHABLE 2083END_FUNCTION art_quick_deoptimize 2084 2085 /* 2086 * Compiled code has requested that we deoptimize into the interpreter. The deoptimization 2087 * will long jump to the interpreter bridge. 2088 */ 2089DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code 2090 SETUP_SAVE_EVERYTHING_FRAME 2091 // Stack should be aligned now. 2092 movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread. 2093 call SYMBOL(artDeoptimizeFromCompiledCode) // (DeoptimizationKind, Thread*) 2094 UNREACHABLE 2095END_FUNCTION art_quick_deoptimize_from_compiled_code 2096 2097 /* 2098 * String's compareTo. 2099 * 2100 * On entry: 2101 * rdi: this string object (known non-null) 2102 * rsi: comp string object (known non-null) 2103 */ 2104DEFINE_FUNCTION art_quick_string_compareto 2105 movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d 2106 movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d 2107 /* Build pointers to the start of string data */ 2108 leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi 2109 leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi 2110#if (STRING_COMPRESSION_FEATURE) 2111 /* Differ cases */ 2112 shrl LITERAL(1), %r8d 2113 jnc .Lstring_compareto_this_is_compressed 2114 shrl LITERAL(1), %r9d 2115 jnc .Lstring_compareto_that_is_compressed 2116 jmp .Lstring_compareto_both_not_compressed 2117.Lstring_compareto_this_is_compressed: 2118 shrl LITERAL(1), %r9d 2119 jnc .Lstring_compareto_both_compressed 2120 /* Comparison this (8-bit) and that (16-bit) */ 2121 mov %r8d, %eax 2122 subl %r9d, %eax 2123 mov %r8d, %ecx 2124 cmovg %r9d, %ecx 2125 /* Going into loop to compare each character */ 2126 jecxz .Lstring_compareto_keep_length1 // check loop counter (if 0 then stop) 2127.Lstring_compareto_loop_comparison_this_compressed: 2128 movzbl (%edi), %r8d // move *(this_cur_char) byte to long 2129 movzwl (%esi), %r9d // move *(that_cur_char) word to long 2130 addl LITERAL(1), %edi // ++this_cur_char (8-bit) 2131 addl LITERAL(2), %esi // ++that_cur_char (16-bit) 2132 subl %r9d, %r8d 2133 loope .Lstring_compareto_loop_comparison_this_compressed 2134 cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char) 2135.Lstring_compareto_keep_length1: 2136 ret 2137.Lstring_compareto_that_is_compressed: 2138 movl %r8d, %eax 2139 subl %r9d, %eax 2140 mov %r8d, %ecx 2141 cmovg %r9d, %ecx 2142 /* Comparison this (8-bit) and that (16-bit) */ 2143 jecxz .Lstring_compareto_keep_length2 // check loop counter (if 0, don't compare) 2144.Lstring_compareto_loop_comparison_that_compressed: 2145 movzwl (%edi), %r8d // move *(this_cur_char) word to long 2146 movzbl (%esi), %r9d // move *(that_cur_chat) byte to long 2147 addl LITERAL(2), %edi // ++this_cur_char (16-bit) 2148 addl LITERAL(1), %esi // ++that_cur_char (8-bit) 2149 subl %r9d, %r8d 2150 loope .Lstring_compareto_loop_comparison_that_compressed 2151 cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char) 2152.Lstring_compareto_keep_length2: 2153 ret 2154.Lstring_compareto_both_compressed: 2155 /* Calculate min length and count diff */ 2156 movl %r8d, %ecx 2157 movl %r8d, %eax 2158 subl %r9d, %eax 2159 cmovg %r9d, %ecx 2160 jecxz .Lstring_compareto_keep_length3 2161 repe cmpsb 2162 je .Lstring_compareto_keep_length3 2163 movzbl -1(%edi), %eax // get last compared char from this string (8-bit) 2164 movzbl -1(%esi), %ecx // get last compared char from comp string (8-bit) 2165 jmp .Lstring_compareto_count_difference 2166#endif // STRING_COMPRESSION_FEATURE 2167.Lstring_compareto_both_not_compressed: 2168 /* Calculate min length and count diff */ 2169 movl %r8d, %ecx 2170 movl %r8d, %eax 2171 subl %r9d, %eax 2172 cmovg %r9d, %ecx 2173 /* 2174 * At this point we have: 2175 * eax: value to return if first part of strings are equal 2176 * ecx: minimum among the lengths of the two strings 2177 * esi: pointer to comp string data 2178 * edi: pointer to this string data 2179 */ 2180 jecxz .Lstring_compareto_keep_length3 2181 repe cmpsw // find nonmatching chars in [%esi] and [%edi], up to length %ecx 2182 je .Lstring_compareto_keep_length3 2183 movzwl -2(%edi), %eax // get last compared char from this string (16-bit) 2184 movzwl -2(%esi), %ecx // get last compared char from comp string (16-bit) 2185.Lstring_compareto_count_difference: 2186 subl %ecx, %eax // return the difference 2187.Lstring_compareto_keep_length3: 2188 ret 2189END_FUNCTION art_quick_string_compareto 2190 2191UNIMPLEMENTED art_quick_memcmp16 2192 2193DEFINE_FUNCTION art_quick_instance_of 2194 SETUP_FP_CALLEE_SAVE_FRAME 2195 subq LITERAL(8), %rsp // Alignment padding. 2196 CFI_ADJUST_CFA_OFFSET(8) 2197 call SYMBOL(artInstanceOfFromCode) // (mirror::Object*, mirror::Class*) 2198 addq LITERAL(8), %rsp 2199 CFI_ADJUST_CFA_OFFSET(-8) 2200 RESTORE_FP_CALLEE_SAVE_FRAME 2201 ret 2202END_FUNCTION art_quick_instance_of 2203 2204// Create a function `name` calling the ReadBarrier::Mark routine, 2205// getting its argument and returning its result through register 2206// `reg`, saving and restoring all caller-save registers. 2207// 2208// The generated function follows a non-standard runtime calling 2209// convention: 2210// - register `reg` (which may be different from RDI) is used to pass 2211// the (sole) argument of this function; 2212// - register `reg` (which may be different from RAX) is used to return 2213// the result of this function (instead of RAX); 2214// - if `reg` is different from `rdi`, RDI is treated like a normal 2215// (non-argument) caller-save register; 2216// - if `reg` is different from `rax`, RAX is treated like a normal 2217// (non-result) caller-save register; 2218// - everything else is the same as in the standard runtime calling 2219// convention (e.g. standard callee-save registers are preserved). 2220MACRO2(READ_BARRIER_MARK_REG, name, reg) 2221 DEFINE_FUNCTION VAR(name) 2222 // Null check so that we can load the lock word. 2223 testq REG_VAR(reg), REG_VAR(reg) 2224 jz .Lret_rb_\name 2225.Lnot_null_\name: 2226 // Check the mark bit, if it is 1 return. 2227 testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)) 2228 jz .Lslow_rb_\name 2229 ret 2230.Lslow_rb_\name: 2231 PUSH rax 2232 movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax 2233 addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax 2234 // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the 2235 // forwarding address one. 2236 // Taken ~25% of the time. 2237 jnae .Lret_forwarding_address\name 2238 2239 // Save all potentially live caller-save core registers. 2240 movq 0(%rsp), %rax 2241 PUSH rcx 2242 PUSH rdx 2243 PUSH rsi 2244 PUSH rdi 2245 PUSH r8 2246 PUSH r9 2247 PUSH r10 2248 PUSH r11 2249 // Create space for caller-save floating-point registers. 2250 subq MACRO_LITERAL(12 * 8), %rsp 2251 CFI_ADJUST_CFA_OFFSET(12 * 8) 2252 // Save all potentially live caller-save floating-point registers. 2253 movq %xmm0, 0(%rsp) 2254 movq %xmm1, 8(%rsp) 2255 movq %xmm2, 16(%rsp) 2256 movq %xmm3, 24(%rsp) 2257 movq %xmm4, 32(%rsp) 2258 movq %xmm5, 40(%rsp) 2259 movq %xmm6, 48(%rsp) 2260 movq %xmm7, 56(%rsp) 2261 movq %xmm8, 64(%rsp) 2262 movq %xmm9, 72(%rsp) 2263 movq %xmm10, 80(%rsp) 2264 movq %xmm11, 88(%rsp) 2265 SETUP_FP_CALLEE_SAVE_FRAME 2266 2267 .ifnc RAW_VAR(reg), rdi 2268 movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`. 2269 .endif 2270 call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) 2271 .ifnc RAW_VAR(reg), rax 2272 movq %rax, REG_VAR(reg) // Return result into `reg`. 2273 .endif 2274 2275 RESTORE_FP_CALLEE_SAVE_FRAME 2276 // Restore floating-point registers. 2277 movq 0(%rsp), %xmm0 2278 movq 8(%rsp), %xmm1 2279 movq 16(%rsp), %xmm2 2280 movq 24(%rsp), %xmm3 2281 movq 32(%rsp), %xmm4 2282 movq 40(%rsp), %xmm5 2283 movq 48(%rsp), %xmm6 2284 movq 56(%rsp), %xmm7 2285 movq 64(%rsp), %xmm8 2286 movq 72(%rsp), %xmm9 2287 movq 80(%rsp), %xmm10 2288 movq 88(%rsp), %xmm11 2289 // Remove floating-point registers. 2290 addq MACRO_LITERAL(12 * 8), %rsp 2291 CFI_ADJUST_CFA_OFFSET(-(12 * 8)) 2292 // Restore core regs, except `reg`, as it is used to return the 2293 // result of this function (simply remove it from the stack instead). 2294 POP_REG_NE r11, RAW_VAR(reg) 2295 POP_REG_NE r10, RAW_VAR(reg) 2296 POP_REG_NE r9, RAW_VAR(reg) 2297 POP_REG_NE r8, RAW_VAR(reg) 2298 POP_REG_NE rdi, RAW_VAR(reg) 2299 POP_REG_NE rsi, RAW_VAR(reg) 2300 POP_REG_NE rdx, RAW_VAR(reg) 2301 POP_REG_NE rcx, RAW_VAR(reg) 2302 POP_REG_NE rax, RAW_VAR(reg) 2303.Lret_rb_\name: 2304 ret 2305.Lret_forwarding_address\name: 2306 // The overflow cleared the top bits. 2307 sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax 2308 movq %rax, REG_VAR(reg) 2309 POP_REG_NE rax, RAW_VAR(reg) 2310 ret 2311 END_FUNCTION VAR(name) 2312END_MACRO 2313 2314READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax 2315READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx 2316READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx 2317READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx 2318// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP) 2319// cannot be used to pass arguments. 2320READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp 2321READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi 2322READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi 2323READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8 2324READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 2325READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 2326READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 2327READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12 2328READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13 2329READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14 2330READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15 2331 2332DEFINE_FUNCTION art_quick_read_barrier_slow 2333 SETUP_FP_CALLEE_SAVE_FRAME 2334 subq LITERAL(8), %rsp // Alignment padding. 2335 CFI_ADJUST_CFA_OFFSET(8) 2336 call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset) 2337 addq LITERAL(8), %rsp 2338 CFI_ADJUST_CFA_OFFSET(-8) 2339 RESTORE_FP_CALLEE_SAVE_FRAME 2340 ret 2341END_FUNCTION art_quick_read_barrier_slow 2342 2343DEFINE_FUNCTION art_quick_read_barrier_for_root_slow 2344 SETUP_FP_CALLEE_SAVE_FRAME 2345 subq LITERAL(8), %rsp // Alignment padding. 2346 CFI_ADJUST_CFA_OFFSET(8) 2347 call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root) 2348 addq LITERAL(8), %rsp 2349 CFI_ADJUST_CFA_OFFSET(-8) 2350 RESTORE_FP_CALLEE_SAVE_FRAME 2351 ret 2352END_FUNCTION art_quick_read_barrier_for_root_slow 2353 2354 /* 2355 * On stack replacement stub. 2356 * On entry: 2357 * [sp] = return address 2358 * rdi = stack to copy 2359 * rsi = size of stack 2360 * rdx = pc to call 2361 * rcx = JValue* result 2362 * r8 = shorty 2363 * r9 = thread 2364 * 2365 * Note that the native C ABI already aligned the stack to 16-byte. 2366 */ 2367DEFINE_FUNCTION art_quick_osr_stub 2368 // Save the non-volatiles. 2369 PUSH rbp // Save rbp. 2370 PUSH rcx // Save rcx/result*. 2371 PUSH r8 // Save r8/shorty*. 2372 2373 // Save callee saves. 2374 PUSH rbx 2375 PUSH r12 2376 PUSH r13 2377 PUSH r14 2378 PUSH r15 2379 2380 pushq LITERAL(0) // Push null for ArtMethod*. 2381 CFI_ADJUST_CFA_OFFSET(8) 2382 movl %esi, %ecx // rcx := size of stack 2383 movq %rdi, %rsi // rsi := stack to copy 2384 movq %rsp, %rbp // Save stack pointer to RBP for CFI use in .Losr_entry. 2385 call .Losr_entry 2386 CFI_REMEMBER_STATE 2387 2388 // Restore stack and callee-saves. 2389 addq LITERAL(8), %rsp 2390 CFI_ADJUST_CFA_OFFSET(-8) 2391 POP r15 2392 POP r14 2393 POP r13 2394 POP r12 2395 POP rbx 2396 POP r8 2397 POP rcx 2398 POP rbp 2399 cmpb LITERAL(68), (%r8) // Test if result type char == 'D'. 2400 je .Losr_return_double_quick 2401 cmpb LITERAL(70), (%r8) // Test if result type char == 'F'. 2402 je .Losr_return_float_quick 2403 movq %rax, (%rcx) // Store the result assuming its a long, int or Object* 2404 ret 2405.Losr_return_double_quick: 2406 movsd %xmm0, (%rcx) // Store the double floating point result. 2407 ret 2408.Losr_return_float_quick: 2409 movss %xmm0, (%rcx) // Store the floating point result. 2410 ret 2411.Losr_entry: 2412 CFI_RESTORE_STATE // Restore CFI state; however, since the call has pushed the 2413 CFI_DEF_CFA_REGISTER(rbp) // return address we need to switch the CFA register to RBP. 2414 2415 subl LITERAL(8), %ecx // Given stack size contains pushed frame pointer, substract it. 2416 subq %rcx, %rsp 2417 movq %rsp, %rdi // rdi := beginning of stack 2418 rep movsb // while (rcx--) { *rdi++ = *rsi++ } 2419 jmp *%rdx 2420END_FUNCTION art_quick_osr_stub 2421 2422DEFINE_FUNCTION art_quick_invoke_polymorphic 2423 // On entry: RDI := unused, RSI := receiver 2424 SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves 2425 movq %rsi, %rdi // RDI := receiver 2426 movq %gs:THREAD_SELF_OFFSET, %rsi // RSI := Thread (self) 2427 movq %rsp, %rdx // RDX := pass SP 2428 call SYMBOL(artInvokePolymorphic) // invoke with (receiver, self, SP) 2429 // save the code pointer 2430 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2431 movq %rax, %xmm0 // Result is in RAX. Copy to FP result register. 2432 RETURN_OR_DELIVER_PENDING_EXCEPTION 2433END_FUNCTION art_quick_invoke_polymorphic 2434 2435DEFINE_FUNCTION art_quick_invoke_custom 2436 SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves 2437 // RDI := call_site_index 2438 movq %gs:THREAD_SELF_OFFSET, %rsi // RSI := Thread::Current() 2439 movq %rsp, %rdx // RDX := SP 2440 call SYMBOL(artInvokeCustom) // artInvokeCustom(Thread*, SP) 2441 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2442 movq %rax, %xmm0 // Result is in RAX. Copy to FP result register. 2443 RETURN_OR_DELIVER_PENDING_EXCEPTION 2444END_FUNCTION art_quick_invoke_custom 2445 2446// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding. 2447// Argument 0: RDI: The context pointer for ExecuteSwitchImpl. 2448// Argument 1: RSI: Pointer to the templated ExecuteSwitchImpl to call. 2449// Argument 2: RDX: The value of DEX PC (memory address of the methods bytecode). 2450DEFINE_FUNCTION ExecuteSwitchImplAsm 2451 PUSH rbx // Spill RBX 2452 movq %rdx, %rbx // RBX = DEX PC (callee save register) 2453 CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* RAX */, 3 /* RBX */, 0) 2454 2455 call *%rsi // Call the wrapped function 2456 2457 POP rbx // Restore RBX 2458 ret 2459END_FUNCTION ExecuteSwitchImplAsm 2460