• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18
19#include "arch/quick_alloc_entrypoints.S"
20
21MACRO0(ASSERT_USE_READ_BARRIER)
22#if !defined(USE_READ_BARRIER)
23    int3
24    int3
25#endif
26END_MACRO
27
28MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
29    // Create space for ART FP callee-saved registers
30    subq MACRO_LITERAL(4 * 8), %rsp
31    CFI_ADJUST_CFA_OFFSET(4 * 8)
32    movq %xmm12, 0(%rsp)
33    movq %xmm13, 8(%rsp)
34    movq %xmm14, 16(%rsp)
35    movq %xmm15, 24(%rsp)
36END_MACRO
37
38MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
39    // Restore ART FP callee-saved registers
40    movq 0(%rsp), %xmm12
41    movq 8(%rsp), %xmm13
42    movq 16(%rsp), %xmm14
43    movq 24(%rsp), %xmm15
44    addq MACRO_LITERAL(4 * 8), %rsp
45    CFI_ADJUST_CFA_OFFSET(- 4 * 8)
46END_MACRO
47
48// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
49
50    /*
51     * Macro that sets up the callee save frame to conform with
52     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
53     */
54MACRO0(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME)
55#if defined(__APPLE__)
56    int3
57    int3
58#else
59    // R10 := Runtime::Current()
60    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
61    movq (%r10), %r10
62    // Save callee save registers to agree with core spills bitmap.
63    PUSH r15  // Callee save.
64    PUSH r14  // Callee save.
65    PUSH r13  // Callee save.
66    PUSH r12  // Callee save.
67    PUSH rbp  // Callee save.
68    PUSH rbx  // Callee save.
69    // Create space for FPR args, plus space for ArtMethod*.
70    subq MACRO_LITERAL(4 * 8 + 8), %rsp
71    CFI_ADJUST_CFA_OFFSET(4 * 8 + 8)
72    // Save FPRs.
73    movq %xmm12, 8(%rsp)
74    movq %xmm13, 16(%rsp)
75    movq %xmm14, 24(%rsp)
76    movq %xmm15, 32(%rsp)
77    // R10 := ArtMethod* for save all callee save frame method.
78    movq RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(%r10), %r10
79    // Store ArtMethod* to bottom of stack.
80    movq %r10, 0(%rsp)
81    // Store rsp as the top quick frame.
82    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
83
84    // Ugly compile-time check, but we only have the preprocessor.
85    // Last +8: implicit return address pushed on stack when caller made call.
86#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 6 * 8 + 4 * 8 + 8 + 8)
87#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86_64) size not as expected."
88#endif
89#endif  // __APPLE__
90END_MACRO
91
92    /*
93     * Macro that sets up the callee save frame to conform with
94     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
95     */
96MACRO0(SETUP_SAVE_REFS_ONLY_FRAME)
97#if defined(__APPLE__)
98    int3
99    int3
100#else
101    // R10 := Runtime::Current()
102    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
103    movq (%r10), %r10
104    // Save callee and GPR args, mixed together to agree with core spills bitmap.
105    PUSH r15  // Callee save.
106    PUSH r14  // Callee save.
107    PUSH r13  // Callee save.
108    PUSH r12  // Callee save.
109    PUSH rbp  // Callee save.
110    PUSH rbx  // Callee save.
111    // Create space for FPR args, plus space for ArtMethod*.
112    subq LITERAL(8 + 4 * 8), %rsp
113    CFI_ADJUST_CFA_OFFSET(8 + 4 * 8)
114    // Save FPRs.
115    movq %xmm12, 8(%rsp)
116    movq %xmm13, 16(%rsp)
117    movq %xmm14, 24(%rsp)
118    movq %xmm15, 32(%rsp)
119    // R10 := ArtMethod* for refs only callee save frame method.
120    movq RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(%r10), %r10
121    // Store ArtMethod* to bottom of stack.
122    movq %r10, 0(%rsp)
123    // Store rsp as the stop quick frame.
124    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
125
126    // Ugly compile-time check, but we only have the preprocessor.
127    // Last +8: implicit return address pushed on stack when caller made call.
128#if (FRAME_SIZE_SAVE_REFS_ONLY != 6 * 8 + 4 * 8 + 8 + 8)
129#error "FRAME_SIZE_SAVE_REFS_ONLY(X86_64) size not as expected."
130#endif
131#endif  // __APPLE__
132END_MACRO
133
134MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME)
135    movq 8(%rsp), %xmm12
136    movq 16(%rsp), %xmm13
137    movq 24(%rsp), %xmm14
138    movq 32(%rsp), %xmm15
139    addq LITERAL(8 + 4*8), %rsp
140    CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
141    // TODO: optimize by not restoring callee-saves restored by the ABI
142    POP rbx
143    POP rbp
144    POP r12
145    POP r13
146    POP r14
147    POP r15
148END_MACRO
149
150    /*
151     * Macro that sets up the callee save frame to conform with
152     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
153     */
154MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
155#if defined(__APPLE__)
156    int3
157    int3
158#else
159    // R10 := Runtime::Current()
160    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
161    movq (%r10), %r10
162    // Save callee and GPR args, mixed together to agree with core spills bitmap.
163    PUSH r15  // Callee save.
164    PUSH r14  // Callee save.
165    PUSH r13  // Callee save.
166    PUSH r12  // Callee save.
167    PUSH r9   // Quick arg 5.
168    PUSH r8   // Quick arg 4.
169    PUSH rsi  // Quick arg 1.
170    PUSH rbp  // Callee save.
171    PUSH rbx  // Callee save.
172    PUSH rdx  // Quick arg 2.
173    PUSH rcx  // Quick arg 3.
174    // Create space for FPR args and create 2 slots for ArtMethod*.
175    subq MACRO_LITERAL(16 + 12 * 8), %rsp
176    CFI_ADJUST_CFA_OFFSET(16 + 12 * 8)
177    // R10 := ArtMethod* for ref and args callee save frame method.
178    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
179    // Save FPRs.
180    movq %xmm0, 16(%rsp)
181    movq %xmm1, 24(%rsp)
182    movq %xmm2, 32(%rsp)
183    movq %xmm3, 40(%rsp)
184    movq %xmm4, 48(%rsp)
185    movq %xmm5, 56(%rsp)
186    movq %xmm6, 64(%rsp)
187    movq %xmm7, 72(%rsp)
188    movq %xmm12, 80(%rsp)
189    movq %xmm13, 88(%rsp)
190    movq %xmm14, 96(%rsp)
191    movq %xmm15, 104(%rsp)
192    // Store ArtMethod* to bottom of stack.
193    movq %r10, 0(%rsp)
194    // Store rsp as the top quick frame.
195    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
196
197    // Ugly compile-time check, but we only have the preprocessor.
198    // Last +8: implicit return address pushed on stack when caller made call.
199#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8)
200#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected."
201#endif
202#endif  // __APPLE__
203END_MACRO
204
205MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
206    // Save callee and GPR args, mixed together to agree with core spills bitmap.
207    PUSH r15  // Callee save.
208    PUSH r14  // Callee save.
209    PUSH r13  // Callee save.
210    PUSH r12  // Callee save.
211    PUSH r9   // Quick arg 5.
212    PUSH r8   // Quick arg 4.
213    PUSH rsi  // Quick arg 1.
214    PUSH rbp  // Callee save.
215    PUSH rbx  // Callee save.
216    PUSH rdx  // Quick arg 2.
217    PUSH rcx  // Quick arg 3.
218    // Create space for FPR args and create 2 slots for ArtMethod*.
219    subq LITERAL(80 + 4 * 8), %rsp
220    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
221    // Save FPRs.
222    movq %xmm0, 16(%rsp)
223    movq %xmm1, 24(%rsp)
224    movq %xmm2, 32(%rsp)
225    movq %xmm3, 40(%rsp)
226    movq %xmm4, 48(%rsp)
227    movq %xmm5, 56(%rsp)
228    movq %xmm6, 64(%rsp)
229    movq %xmm7, 72(%rsp)
230    movq %xmm12, 80(%rsp)
231    movq %xmm13, 88(%rsp)
232    movq %xmm14, 96(%rsp)
233    movq %xmm15, 104(%rsp)
234    // Store ArtMethod to bottom of stack.
235    movq %rdi, 0(%rsp)
236    // Store rsp as the stop quick frame.
237    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
238END_MACRO
239
240MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME)
241    // Restore FPRs.
242    movq 16(%rsp), %xmm0
243    movq 24(%rsp), %xmm1
244    movq 32(%rsp), %xmm2
245    movq 40(%rsp), %xmm3
246    movq 48(%rsp), %xmm4
247    movq 56(%rsp), %xmm5
248    movq 64(%rsp), %xmm6
249    movq 72(%rsp), %xmm7
250    movq 80(%rsp), %xmm12
251    movq 88(%rsp), %xmm13
252    movq 96(%rsp), %xmm14
253    movq 104(%rsp), %xmm15
254    addq MACRO_LITERAL(80 + 4 * 8), %rsp
255    CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
256    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
257    POP rcx
258    POP rdx
259    POP rbx
260    POP rbp
261    POP rsi
262    POP r8
263    POP r9
264    POP r12
265    POP r13
266    POP r14
267    POP r15
268END_MACRO
269
270    /*
271     * Macro that sets up the callee save frame to conform with
272     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
273     * when R14 and R15 are already saved.
274     */
275MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED)
276#if defined(__APPLE__)
277    int3
278    int3
279#else
280    // Save core registers from highest to lowest to agree with core spills bitmap.
281    // R14 and R15, or at least placeholders for them, are already on the stack.
282    PUSH r13
283    PUSH r12
284    PUSH r11
285    PUSH r10
286    PUSH r9
287    PUSH r8
288    PUSH rdi
289    PUSH rsi
290    PUSH rbp
291    PUSH rbx
292    PUSH rdx
293    PUSH rcx
294    PUSH rax
295    // Create space for FPRs and stack alignment padding.
296    subq MACRO_LITERAL(8 + 16 * 8), %rsp
297    CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
298    // R10 := Runtime::Current()
299    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
300    movq (%r10), %r10
301    // Save FPRs.
302    movq %xmm0, 8(%rsp)
303    movq %xmm1, 16(%rsp)
304    movq %xmm2, 24(%rsp)
305    movq %xmm3, 32(%rsp)
306    movq %xmm4, 40(%rsp)
307    movq %xmm5, 48(%rsp)
308    movq %xmm6, 56(%rsp)
309    movq %xmm7, 64(%rsp)
310    movq %xmm8, 72(%rsp)
311    movq %xmm9, 80(%rsp)
312    movq %xmm10, 88(%rsp)
313    movq %xmm11, 96(%rsp)
314    movq %xmm12, 104(%rsp)
315    movq %xmm13, 112(%rsp)
316    movq %xmm14, 120(%rsp)
317    movq %xmm15, 128(%rsp)
318    // Push ArtMethod* for save everything frame method.
319    pushq RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(%r10)
320    CFI_ADJUST_CFA_OFFSET(8)
321    // Store rsp as the top quick frame.
322    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
323
324    // Ugly compile-time check, but we only have the preprocessor.
325    // Last +8: implicit return address pushed on stack when caller made call.
326#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
327#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
328#endif
329#endif  // __APPLE__
330END_MACRO
331
332    /*
333     * Macro that sets up the callee save frame to conform with
334     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
335     * when R15 is already saved.
336     */
337MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
338    PUSH r14
339    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
340END_MACRO
341
342    /*
343     * Macro that sets up the callee save frame to conform with
344     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
345     */
346MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
347    PUSH r15
348    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
349END_MACRO
350
351MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
352    // Restore FPRs. Method and padding is still on the stack.
353    movq 16(%rsp), %xmm0
354    movq 24(%rsp), %xmm1
355    movq 32(%rsp), %xmm2
356    movq 40(%rsp), %xmm3
357    movq 48(%rsp), %xmm4
358    movq 56(%rsp), %xmm5
359    movq 64(%rsp), %xmm6
360    movq 72(%rsp), %xmm7
361    movq 80(%rsp), %xmm8
362    movq 88(%rsp), %xmm9
363    movq 96(%rsp), %xmm10
364    movq 104(%rsp), %xmm11
365    movq 112(%rsp), %xmm12
366    movq 120(%rsp), %xmm13
367    movq 128(%rsp), %xmm14
368    movq 136(%rsp), %xmm15
369END_MACRO
370
371MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
372    // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
373    POP rcx
374    POP rdx
375    POP rbx
376    POP rbp
377    POP rsi
378    POP rdi
379    POP r8
380    POP r9
381    POP r10
382    POP r11
383    POP r12
384    POP r13
385    POP r14
386    POP r15
387END_MACRO
388
389MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
390    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
391
392    // Remove save everything callee save method, stack alignment padding and FPRs.
393    addq MACRO_LITERAL(16 + 16 * 8), %rsp
394    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
395
396    POP rax
397    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
398END_MACRO
399
400MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
401    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
402
403    // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
404    addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp
405    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8))
406
407    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
408END_MACRO
409
410    /*
411     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
412     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
413     */
414MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY)
415    // (Thread*) setup
416    movq %gs:THREAD_SELF_OFFSET, %rdi
417    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
418    UNREACHABLE
419END_MACRO
420
421    /*
422     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
423     * exception is Thread::Current()->exception_.
424     */
425MACRO0(DELIVER_PENDING_EXCEPTION)
426    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME        // save callee saves for throw
427    DELIVER_PENDING_EXCEPTION_FRAME_READY
428END_MACRO
429
430MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
431    DEFINE_FUNCTION VAR(c_name)
432    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
433    // Outgoing argument set up
434    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
435    call CALLVAR(cxx_name)             // cxx_name(Thread*)
436    UNREACHABLE
437    END_FUNCTION VAR(c_name)
438END_MACRO
439
440MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
441    DEFINE_FUNCTION VAR(c_name)
442    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
443    // Outgoing argument set up
444    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
445    call CALLVAR(cxx_name)             // cxx_name(Thread*)
446    UNREACHABLE
447    END_FUNCTION VAR(c_name)
448END_MACRO
449
450MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
451    DEFINE_FUNCTION VAR(c_name)
452    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
453    // Outgoing argument set up
454    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
455    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
456    UNREACHABLE
457    END_FUNCTION VAR(c_name)
458END_MACRO
459
460MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
461    DEFINE_FUNCTION VAR(c_name)
462    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
463    // Outgoing argument set up
464    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
465    call CALLVAR(cxx_name)             // cxx_name(Thread*)
466    UNREACHABLE
467    END_FUNCTION VAR(c_name)
468END_MACRO
469
470    /*
471     * Called by managed code to create and deliver a NullPointerException.
472     */
473NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
474
475    /*
476     * Call installed by a signal handler to create and deliver a NullPointerException.
477     */
478DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
479    // Fault address and return address were saved by the fault handler.
480    // Save all registers as basis for long jump context; R15 will replace fault address later.
481    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
482    // Retrieve fault address and save R15.
483    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
484    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
485    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
486    // Outgoing argument set up; RDI already contains the fault address.
487    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
488    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
489    UNREACHABLE
490END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
491
492    /*
493     * Called by managed code to create and deliver an ArithmeticException.
494     */
495NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
496
497    /*
498     * Called by managed code to create and deliver a StackOverflowError.
499     */
500NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
501
502    /*
503     * Called by managed code, saves callee saves and then calls artThrowException
504     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
505     */
506ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
507
508    /*
509     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
510     * index, arg2 holds limit.
511     */
512TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
513
514    /*
515     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
516     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
517     */
518TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
519
520    /*
521     * All generated callsites for interface invokes and invocation slow paths will load arguments
522     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
523     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
524     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
525     *
526     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
527     * of the target Method* in rax and method->code_ in rdx.
528     *
529     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
530     * thread and we branch to another stub to deliver it.
531     *
532     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
533     * location on the stack.
534     *
535     * Adapted from x86 code.
536     */
537MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
538    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
539    // Helper signature is always
540    // (method_idx, *this_object, *caller_method, *self, sp)
541
542    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
543    movq %rsp, %rcx                                        // pass SP
544
545    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
546                                                           // save the code pointer
547    movq %rax, %rdi
548    movq %rdx, %rax
549    RESTORE_SAVE_REFS_AND_ARGS_FRAME
550
551    testq %rdi, %rdi
552    jz 1f
553
554    // Tail call to intended method.
555    jmp *%rax
5561:
557    DELIVER_PENDING_EXCEPTION
558END_MACRO
559MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
560    DEFINE_FUNCTION VAR(c_name)
561    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
562    END_FUNCTION VAR(c_name)
563END_MACRO
564
565INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
566
567INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
568INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
569INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
570INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
571
572
573    /*
574     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
575     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
576     * the end of the shorty.
577     */
578MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
5791: // LOOP
580    movb (%r10), %al              // al := *shorty
581    addq MACRO_LITERAL(1), %r10   // shorty++
582    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
583    je VAR(finished)
584    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
585    je 2f
586    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
587    je 3f
588    addq MACRO_LITERAL(4), %r11   // arg_array++
589    //  Handle extra space in arg array taken by a long.
590    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
591    jne 1b
592    addq MACRO_LITERAL(4), %r11   // arg_array++
593    jmp 1b                        // goto LOOP
5942:  // FOUND_DOUBLE
595    movsd (%r11), REG_VAR(xmm_reg)
596    addq MACRO_LITERAL(8), %r11   // arg_array+=2
597    jmp 4f
5983:  // FOUND_FLOAT
599    movss (%r11), REG_VAR(xmm_reg)
600    addq MACRO_LITERAL(4), %r11   // arg_array++
6014:
602END_MACRO
603
604    /*
605     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
606     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
607     * the end of the shorty.
608     */
609MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
6101: // LOOP
611    movb (%r10), %al              // al := *shorty
612    addq MACRO_LITERAL(1), %r10   // shorty++
613    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
614    je  VAR(finished)
615    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
616    je 2f
617    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
618    je 3f
619    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
620    je 4f
621    movl (%r11), REG_VAR(gpr_reg32)
622    addq MACRO_LITERAL(4), %r11   // arg_array++
623    jmp 5f
6242:  // FOUND_LONG
625    movq (%r11), REG_VAR(gpr_reg64)
626    addq MACRO_LITERAL(8), %r11   // arg_array+=2
627    jmp 5f
6283:  // SKIP_FLOAT
629    addq MACRO_LITERAL(4), %r11   // arg_array++
630    jmp 1b
6314:  // SKIP_DOUBLE
632    addq MACRO_LITERAL(8), %r11   // arg_array+=2
633    jmp 1b
6345:
635END_MACRO
636
637    /*
638     * Quick invocation stub.
639     * On entry:
640     *   [sp] = return address
641     *   rdi = method pointer
642     *   rsi = argument array that must at least contain the this pointer.
643     *   rdx = size of argument array in bytes
644     *   rcx = (managed) thread pointer
645     *   r8 = JValue* result
646     *   r9 = char* shorty
647     */
648DEFINE_FUNCTION art_quick_invoke_stub
649#if defined(__APPLE__)
650    int3
651    int3
652#else
653    // Set up argument XMM registers.
654    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
655    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
656    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
657    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
658    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
659    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
660    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
661    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
662    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
663    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
664    .balign 16
665.Lxmm_setup_finished:
666    PUSH rbp                      // Save rbp.
667    PUSH r8                       // Save r8/result*.
668    PUSH r9                       // Save r9/shorty*.
669    PUSH rbx                      // Save native callee save rbx
670    PUSH r12                      // Save native callee save r12
671    PUSH r13                      // Save native callee save r13
672    PUSH r14                      // Save native callee save r14
673    PUSH r15                      // Save native callee save r15
674    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
675    CFI_DEF_CFA_REGISTER(rbp)
676
677    movl %edx, %r10d
678    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
679                                   // r8, r9, rbx, r12, r13, r14, and r15 in frame.
680    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
681    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
682                                   // r13, r14, and r15
683    subq %rdx, %rsp                // Reserve stack space for argument array.
684
685#if (STACK_REFERENCE_SIZE != 4)
686#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
687#endif
688    movq LITERAL(0), (%rsp)       // Store null for method*
689
690    movl %r10d, %ecx              // Place size of args in rcx.
691    movq %rdi, %rax               // rax := method to be called
692    movq %rsi, %r11               // r11 := arg_array
693    leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
694                                  // arguments.
695    // Copy arg array into stack.
696    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
697    leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
698    movq %rax, %rdi               // rdi := method to be called
699    movl (%r11), %esi             // rsi := this pointer
700    addq LITERAL(4), %r11         // arg_array++
701    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
702    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
703    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
704    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
705.Lgpr_setup_finished:
706    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
707    movq %rbp, %rsp               // Restore stack pointer.
708    POP r15                       // Pop r15
709    POP r14                       // Pop r14
710    POP r13                       // Pop r13
711    POP r12                       // Pop r12
712    POP rbx                       // Pop rbx
713    POP r9                        // Pop r9 - shorty*
714    POP r8                        // Pop r8 - result*.
715    POP rbp                       // Pop rbp
716    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
717    je .Lreturn_double_quick
718    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
719    je .Lreturn_float_quick
720    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
721    ret
722.Lreturn_double_quick:
723    movsd %xmm0, (%r8)            // Store the double floating point result.
724    ret
725.Lreturn_float_quick:
726    movss %xmm0, (%r8)            // Store the floating point result.
727    ret
728#endif  // __APPLE__
729END_FUNCTION art_quick_invoke_stub
730
731    /*
732     * Quick invocation stub.
733     * On entry:
734     *   [sp] = return address
735     *   rdi = method pointer
736     *   rsi = argument array or null if no arguments.
737     *   rdx = size of argument array in bytes
738     *   rcx = (managed) thread pointer
739     *   r8 = JValue* result
740     *   r9 = char* shorty
741     */
742DEFINE_FUNCTION art_quick_invoke_static_stub
743#if defined(__APPLE__)
744    int3
745    int3
746#else
747    // Set up argument XMM registers.
748    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
749    movq %rsi, %r11               // R11 := arg_array
750    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
751    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
752    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
753    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
754    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
755    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
756    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
757    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
758    .balign 16
759.Lxmm_setup_finished2:
760    PUSH rbp                      // Save rbp.
761    PUSH r8                       // Save r8/result*.
762    PUSH r9                       // Save r9/shorty*.
763    PUSH rbx                      // Save rbx
764    PUSH r12                      // Save r12
765    PUSH r13                      // Save r13
766    PUSH r14                      // Save r14
767    PUSH r15                      // Save r15
768    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
769    CFI_DEF_CFA_REGISTER(rbp)
770
771    movl %edx, %r10d
772    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
773                                   // r8, r9, r12, r13, r14, and r15 in frame.
774    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
775    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
776                                   // r13, r14, and r15.
777    subq %rdx, %rsp                // Reserve stack space for argument array.
778
779#if (STACK_REFERENCE_SIZE != 4)
780#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
781#endif
782    movq LITERAL(0), (%rsp)        // Store null for method*
783
784    movl %r10d, %ecx               // Place size of args in rcx.
785    movq %rdi, %rax                // rax := method to be called
786    movq %rsi, %r11                // r11 := arg_array
787    leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
788                                   // stack arguments.
789    // Copy arg array into stack.
790    rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
791    leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
792    movq %rax, %rdi                // rdi := method to be called
793    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
794    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
795    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
796    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
797    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
798.Lgpr_setup_finished2:
799    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
800    movq %rbp, %rsp                // Restore stack pointer.
801    POP r15                        // Pop r15
802    POP r14                        // Pop r14
803    POP r13                        // Pop r13
804    POP r12                        // Pop r12
805    POP rbx                        // Pop rbx
806    POP r9                         // Pop r9 - shorty*.
807    POP r8                         // Pop r8 - result*.
808    POP rbp                        // Pop rbp
809    cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
810    je .Lreturn_double_quick2
811    cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
812    je .Lreturn_float_quick2
813    movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
814    ret
815.Lreturn_double_quick2:
816    movsd %xmm0, (%r8)             // Store the double floating point result.
817    ret
818.Lreturn_float_quick2:
819    movss %xmm0, (%r8)             // Store the floating point result.
820    ret
821#endif  // __APPLE__
822END_FUNCTION art_quick_invoke_static_stub
823
824    /*
825     * Long jump stub.
826     * On entry:
827     *   rdi = gprs
828     *   rsi = fprs
829     */
830DEFINE_FUNCTION art_quick_do_long_jump
831#if defined(__APPLE__)
832    int3
833    int3
834#else
835    // Restore FPRs.
836    movq 0(%rsi), %xmm0
837    movq 8(%rsi), %xmm1
838    movq 16(%rsi), %xmm2
839    movq 24(%rsi), %xmm3
840    movq 32(%rsi), %xmm4
841    movq 40(%rsi), %xmm5
842    movq 48(%rsi), %xmm6
843    movq 56(%rsi), %xmm7
844    movq 64(%rsi), %xmm8
845    movq 72(%rsi), %xmm9
846    movq 80(%rsi), %xmm10
847    movq 88(%rsi), %xmm11
848    movq 96(%rsi), %xmm12
849    movq 104(%rsi), %xmm13
850    movq 112(%rsi), %xmm14
851    movq 120(%rsi), %xmm15
852    // Restore FPRs.
853    movq %rdi, %rsp   // RSP points to gprs.
854    // Load all registers except RSP and RIP with values in gprs.
855    popq %r15
856    popq %r14
857    popq %r13
858    popq %r12
859    popq %r11
860    popq %r10
861    popq %r9
862    popq %r8
863    popq %rdi
864    popq %rsi
865    popq %rbp
866    addq LITERAL(8), %rsp   // Skip rsp
867    popq %rbx
868    popq %rdx
869    popq %rcx
870    popq %rax
871    popq %rsp      // Load stack pointer.
872    ret            // From higher in the stack pop rip.
873#endif  // __APPLE__
874END_FUNCTION art_quick_do_long_jump
875
876MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
877    DEFINE_FUNCTION VAR(c_name)
878    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
879    // Outgoing argument set up
880    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
881    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
882    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
883    CALL_MACRO(return_macro)             // return or deliver exception
884    END_FUNCTION VAR(c_name)
885END_MACRO
886
887MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
888    DEFINE_FUNCTION VAR(c_name)
889    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
890    // Outgoing argument set up
891    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
892    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
893    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
894    CALL_MACRO(return_macro)             // return or deliver exception
895    END_FUNCTION VAR(c_name)
896END_MACRO
897
898MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
899    DEFINE_FUNCTION VAR(c_name)
900    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
901    // Outgoing argument set up
902    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
903    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
904    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
905    CALL_MACRO(return_macro)            // return or deliver exception
906    END_FUNCTION VAR(c_name)
907END_MACRO
908
909MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
910    DEFINE_FUNCTION VAR(c_name)
911    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
912    // Outgoing argument set up
913    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
914    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
915    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
916    CALL_MACRO(return_macro)            // return or deliver exception
917    END_FUNCTION VAR(c_name)
918END_MACRO
919
920MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
921    DEFINE_FUNCTION VAR(c_name)
922    SETUP_SAVE_REFS_ONLY_FRAME
923                                        // arg0 is in rdi
924    movq %gs:THREAD_SELF_OFFSET, %rsi   // pass Thread::Current()
925    call CALLVAR(cxx_name)              // cxx_name(arg0, Thread*)
926    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
927    CALL_MACRO(return_macro)
928    END_FUNCTION VAR(c_name)
929END_MACRO
930
931MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
932    DEFINE_FUNCTION VAR(c_name)
933    SETUP_SAVE_REFS_ONLY_FRAME
934                                        // arg0 and arg1 are in rdi/rsi
935    movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
936    call CALLVAR(cxx_name)              // (arg0, arg1, Thread*)
937    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
938    CALL_MACRO(return_macro)
939    END_FUNCTION VAR(c_name)
940END_MACRO
941
942MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
943    DEFINE_FUNCTION VAR(c_name)
944    SETUP_SAVE_REFS_ONLY_FRAME
945                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
946    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
947    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
948    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
949    CALL_MACRO(return_macro)            // return or deliver exception
950    END_FUNCTION VAR(c_name)
951END_MACRO
952
953// Macro for string and type resolution and initialization.
954MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name)
955    DEFINE_FUNCTION VAR(c_name)
956    SETUP_SAVE_EVERYTHING_FRAME                   // save everything for GC
957    // Outgoing argument set up
958    movl %eax, %edi                               // pass string index
959    movq %gs:THREAD_SELF_OFFSET, %rsi             // pass Thread::Current()
960    call CALLVAR(cxx_name)                        // cxx_name(arg0, Thread*)
961    testl %eax, %eax                              // If result is null, deliver the OOME.
962    jz 1f
963    CFI_REMEMBER_STATE
964    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX        // restore frame up to return address
965    ret
966    CFI_RESTORE_STATE
967    CFI_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING)  // workaround for clang bug: 31975598
9681:
969    DELIVER_PENDING_EXCEPTION_FRAME_READY
970    END_FUNCTION VAR(c_name)
971END_MACRO
972
973MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
974    testq %rax, %rax               // rax == 0 ?
975    jz  1f                         // if rax == 0 goto 1
976    ret                            // return
9771:                                 // deliver exception on current thread
978    DELIVER_PENDING_EXCEPTION
979END_MACRO
980
981MACRO0(RETURN_IF_EAX_ZERO)
982    testl %eax, %eax               // eax == 0 ?
983    jnz  1f                        // if eax != 0 goto 1
984    ret                            // return
9851:                                 // deliver exception on current thread
986    DELIVER_PENDING_EXCEPTION
987END_MACRO
988
989MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
990    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
991    testq %rcx, %rcx               // rcx == 0 ?
992    jnz 1f                         // if rcx != 0 goto 1
993    ret                            // return
9941:                                 // deliver exception on current thread
995    DELIVER_PENDING_EXCEPTION
996END_MACRO
997
998// Generate the allocation entrypoints for each allocator.
999GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1000
1001// Comment out allocators that have x86_64 specific asm.
1002// Region TLAB:
1003// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1004// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1005GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1006// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1007// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1008// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1009// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1010// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1011GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1012GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1013GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1014// Normal TLAB:
1015// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1016// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1017GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1018// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1019// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1020// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1021// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1022// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1023GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1024GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1025GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1026
1027
1028// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
1029MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
1030    DEFINE_FUNCTION VAR(c_name)
1031    // Fast path rosalloc allocation.
1032    // RDI: mirror::Class*, RAX: return value
1033    // RSI, RDX, RCX, R8, R9: free.
1034                                                           // Check if the thread local
1035                                                           // allocation stack has room.
1036    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
1037    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
1038    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
1039    jae    .Lslow_path\c_name
1040                                                           // Load the object size
1041    movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
1042                                                           // Check if the size is for a thread
1043                                                           // local allocation. Also does the
1044                                                           // initialized and finalizable checks.
1045    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
1046    ja     .Lslow_path\c_name
1047                                                           // Compute the rosalloc bracket index
1048                                                           // from the size.
1049    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
1050                                                           // Load the rosalloc run (r9)
1051                                                           // Subtract __SIZEOF_POINTER__ to
1052                                                           // subtract one from edi as there is no
1053                                                           // 0 byte run and the size is already
1054                                                           // aligned.
1055    movq   (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9
1056                                                           // Load the free list head (rax). This
1057                                                           // will be the return val.
1058    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
1059    testq  %rax, %rax
1060    jz     .Lslow_path\c_name
1061    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
1062                                                           // Push the new object onto the thread
1063                                                           // local allocation stack and
1064                                                           // increment the thread local
1065                                                           // allocation stack top.
1066    movl   %eax, (%rcx)
1067    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
1068    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
1069                                                           // Load the next pointer of the head
1070                                                           // and update the list head with the
1071                                                           // next pointer.
1072    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
1073    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
1074                                                           // Store the class pointer in the
1075                                                           // header. This also overwrites the
1076                                                           // next pointer. The offsets are
1077                                                           // asserted to match.
1078#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1079#error "Class pointer needs to overwrite next pointer."
1080#endif
1081    POISON_HEAP_REF edi
1082    movl   %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
1083                                                           // Decrement the size of the free list
1084    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
1085                                                           // No fence necessary for x86.
1086    ret
1087.Lslow_path\c_name:
1088    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
1089    // Outgoing argument set up
1090    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
1091    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
1092    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
1093    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
1094    END_FUNCTION VAR(c_name)
1095END_MACRO
1096
1097ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
1098ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
1099
1100// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
1101// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
1102// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
1103//
1104// RDI: the class, RAX: return value.
1105// RCX, RSI, RDX: scratch, r8: Thread::Current().
1106MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
1107    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
1108END_MACRO
1109
1110// The fast path code for art_quick_alloc_object_initialized_region_tlab.
1111//
1112// RDI: the class, RSI: ArtMethod*, RAX: return value.
1113// RCX, RSI, RDX: scratch, r8: Thread::Current().
1114MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
1115    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
1116    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
1117    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
1118    addq %rax, %rcx                                            // Add size to pos, note that these
1119                                                               // are both 32 bit ints, overflow
1120                                                               // will cause the add to be past the
1121                                                               // end of the thread local region.
1122    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
1123    ja   RAW_VAR(slowPathLabel)
1124    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
1125    incq THREAD_LOCAL_OBJECTS_OFFSET(%r8)                      // Increase thread_local_objects.
1126                                                               // Store the class pointer in the
1127                                                               // header.
1128                                                               // No fence needed for x86.
1129    POISON_HEAP_REF edi
1130    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
1131    ret                                                        // Fast path succeeded.
1132END_MACRO
1133
1134// The fast path code for art_quick_alloc_array_region_tlab.
1135// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size
1136// Free temps: RCX, RDX, R8
1137// Output: RAX: return value.
1138MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
1139    movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
1140    // Mask out the unaligned part to make sure we are 8 byte aligned.
1141    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
1142    movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
1143    addq %rax, %r9
1144    cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
1145    ja   RAW_VAR(slowPathLabel)
1146    movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
1147    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx)         // Increase thread_local_objects.
1148                                                               // Store the class pointer in the
1149                                                               // header.
1150                                                               // No fence needed for x86.
1151    POISON_HEAP_REF edi
1152    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
1153    movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
1154    ret                                                        // Fast path succeeded.
1155END_MACRO
1156
1157// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
1158// and art_quick_alloc_object_{resolved, initialized}_region_tlab.
1159MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
1160    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
1161    // Outgoing argument set up
1162    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
1163    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
1164    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
1165    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
1166END_MACRO
1167
1168// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
1169// called with CC if the GC is not active.
1170DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
1171    // RDI: mirror::Class* klass
1172    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1173    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
1174.Lart_quick_alloc_object_resolved_tlab_slow_path:
1175    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
1176END_FUNCTION art_quick_alloc_object_resolved_tlab
1177
1178// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
1179// May be called with CC if the GC is not active.
1180DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
1181    // RDI: mirror::Class* klass
1182    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1183    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
1184.Lart_quick_alloc_object_initialized_tlab_slow_path:
1185    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
1186END_FUNCTION art_quick_alloc_object_initialized_tlab
1187
1188MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN)
1189    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx        // Load component type.
1190    UNPOISON_HEAP_REF ecx
1191    movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
1192    shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx        // Get component size shift.
1193    movq %rsi, %r9
1194    salq %cl, %r9                                              // Calculate array count shifted.
1195    // Add array header + alignment rounding.
1196    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1197    // Add 4 extra bytes if we are doing a long array.
1198    addq MACRO_LITERAL(1), %rcx
1199    andq MACRO_LITERAL(4), %rcx
1200#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1201#error Long array data offset must be 4 greater than int array data offset.
1202#endif
1203    addq %rcx, %r9
1204END_MACRO
1205
1206MACRO0(COMPUTE_ARRAY_SIZE_8)
1207    // RDI: mirror::Class* klass, RSI: int32_t component_count
1208    // RDX, RCX, R8, R9: free. RAX: return val.
1209    movq %rsi, %r9
1210    // Add array header + alignment rounding.
1211    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1212END_MACRO
1213
1214MACRO0(COMPUTE_ARRAY_SIZE_16)
1215    // RDI: mirror::Class* klass, RSI: int32_t component_count
1216    // RDX, RCX, R8, R9: free. RAX: return val.
1217    movq %rsi, %r9
1218    salq MACRO_LITERAL(1), %r9
1219    // Add array header + alignment rounding.
1220    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1221END_MACRO
1222
1223MACRO0(COMPUTE_ARRAY_SIZE_32)
1224    // RDI: mirror::Class* klass, RSI: int32_t component_count
1225    // RDX, RCX, R8, R9: free. RAX: return val.
1226    movq %rsi, %r9
1227    salq MACRO_LITERAL(2), %r9
1228    // Add array header + alignment rounding.
1229    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1230END_MACRO
1231
1232MACRO0(COMPUTE_ARRAY_SIZE_64)
1233    // RDI: mirror::Class* klass, RSI: int32_t component_count
1234    // RDX, RCX, R8, R9: free. RAX: return val.
1235    movq %rsi, %r9
1236    salq MACRO_LITERAL(3), %r9
1237    // Add array header + alignment rounding.
1238    addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1239END_MACRO
1240
1241MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
1242    DEFINE_FUNCTION VAR(c_entrypoint)
1243    // RDI: mirror::Class* klass, RSI: int32_t component_count
1244    // RDX, RCX, R8, R9: free. RAX: return val.
1245    CALL_MACRO(size_setup)
1246    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
1247.Lslow_path\c_entrypoint:
1248    SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
1249    // Outgoing argument set up
1250    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
1251    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
1252    RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
1253    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
1254    END_FUNCTION VAR(c_entrypoint)
1255END_MACRO
1256
1257
1258GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1259GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1260GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1261GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1262GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1263
1264GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1265GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1266GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1267GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1268GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1269
1270// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
1271DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
1272    // Fast path region tlab allocation.
1273    // RDI: mirror::Class* klass
1274    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1275    ASSERT_USE_READ_BARRIER
1276    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
1277.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
1278    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
1279END_FUNCTION art_quick_alloc_object_resolved_region_tlab
1280
1281// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
1282DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
1283    // Fast path region tlab allocation.
1284    // RDI: mirror::Class* klass
1285    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1286    ASSERT_USE_READ_BARRIER
1287    // No read barrier since the caller is responsible for that.
1288    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
1289.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
1290    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
1291END_FUNCTION art_quick_alloc_object_initialized_region_tlab
1292
1293ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1294ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
1295ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
1296ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1297
1298TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
1299
1300DEFINE_FUNCTION art_quick_lock_object
1301    testl %edi, %edi                      // Null check object/rdi.
1302    jz   .Lslow_lock
1303.Lretry_lock:
1304    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
1305    test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // Test the 2 high bits.
1306    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
1307    movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
1308    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1309    test %ecx, %ecx
1310    jnz  .Lalready_thin                   // Lock word contains a thin lock.
1311    // unlocked case - edx: original lock word, edi: obj.
1312    movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
1313    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1314    or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
1315    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1316    jnz  .Lretry_lock                     // cmpxchg failed retry
1317    ret
1318.Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
1319    movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
1320    cmpw %cx, %dx                         // do we hold the lock already?
1321    jne  .Lslow_lock
1322    movl %edx, %ecx                       // copy the lock word to check count overflow.
1323    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1324    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
1325    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if the upper bit (28) is set
1326    jne  .Lslow_lock                      // count overflowed so go slow
1327    movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
1328    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
1329    // update lockword, cmpxchg necessary for read barrier bits.
1330    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
1331    jnz  .Lretry_lock                     // cmpxchg failed retry
1332    ret
1333.Lslow_lock:
1334    SETUP_SAVE_REFS_ONLY_FRAME
1335    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1336    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1337    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1338    RETURN_IF_EAX_ZERO
1339END_FUNCTION art_quick_lock_object
1340
1341DEFINE_FUNCTION art_quick_lock_object_no_inline
1342    SETUP_SAVE_REFS_ONLY_FRAME
1343    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1344    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1345    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1346    RETURN_IF_EAX_ZERO
1347END_FUNCTION art_quick_lock_object_no_inline
1348
1349DEFINE_FUNCTION art_quick_unlock_object
1350    testl %edi, %edi                      // null check object/edi
1351    jz   .Lslow_unlock
1352.Lretry_unlock:
1353    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
1354    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1355    test LITERAL(LOCK_WORD_STATE_MASK), %ecx
1356    jnz  .Lslow_unlock                    // lock word contains a monitor
1357    cmpw %cx, %dx                         // does the thread id match?
1358    jne  .Lslow_unlock
1359    movl %ecx, %edx                       // copy the lock word to detect new count of 0.
1360    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
1361    cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
1362    jae  .Lrecursive_thin_unlock
1363    // update lockword, cmpxchg necessary for read barrier bits.
1364    movl %ecx, %eax                       // eax: old lock word.
1365    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original gc bits.
1366#ifndef USE_READ_BARRIER
1367    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1368#else
1369    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1370    jnz  .Lretry_unlock                   // cmpxchg failed retry
1371#endif
1372    ret
1373.Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
1374    // update lockword, cmpxchg necessary for read barrier bits.
1375    movl %ecx, %eax                       // eax: old lock word.
1376    subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
1377#ifndef USE_READ_BARRIER
1378    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1379#else
1380    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1381    jnz  .Lretry_unlock                   // cmpxchg failed retry
1382#endif
1383    ret
1384.Lslow_unlock:
1385    SETUP_SAVE_REFS_ONLY_FRAME
1386    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1387    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1388    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1389    RETURN_IF_EAX_ZERO
1390END_FUNCTION art_quick_unlock_object
1391
1392DEFINE_FUNCTION art_quick_unlock_object_no_inline
1393    SETUP_SAVE_REFS_ONLY_FRAME
1394    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1395    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1396    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1397    RETURN_IF_EAX_ZERO
1398END_FUNCTION art_quick_unlock_object_no_inline
1399
1400DEFINE_FUNCTION art_quick_check_instance_of
1401    // We could check the super classes here but that is usually already checked in the caller.
1402    PUSH rdi                          // Save args for exc
1403    PUSH rsi
1404    subq LITERAL(8), %rsp             // Alignment padding.
1405    CFI_ADJUST_CFA_OFFSET(8)
1406    SETUP_FP_CALLEE_SAVE_FRAME
1407    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
1408    testq %rax, %rax
1409    jz 1f                             // jump forward if not assignable
1410    RESTORE_FP_CALLEE_SAVE_FRAME
1411    addq LITERAL(24), %rsp            // pop arguments
1412    CFI_ADJUST_CFA_OFFSET(-24)
1413
1414.Lreturn:
1415    ret
1416
1417    CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
14181:
1419    RESTORE_FP_CALLEE_SAVE_FRAME
1420    addq LITERAL(8), %rsp             // pop padding
1421    CFI_ADJUST_CFA_OFFSET(-8)
1422    POP rsi                           // Pop arguments
1423    POP rdi
1424    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1425    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
1426    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
1427    UNREACHABLE
1428END_FUNCTION art_quick_check_instance_of
1429
1430
1431// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
1432MACRO2(POP_REG_NE, reg, exclude_reg)
1433    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
1434      addq MACRO_LITERAL(8), %rsp
1435      CFI_ADJUST_CFA_OFFSET(-8)
1436    .else
1437      POP RAW_VAR(reg)
1438    .endif
1439END_MACRO
1440
1441    /*
1442     * Macro to insert read barrier, used in art_quick_aput_obj.
1443     * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
1444     * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
1445     * 64b PUSH/POP and 32b argument.
1446     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1447     *
1448     * As with art_quick_aput_obj function, the 64b versions are in comments.
1449     */
1450MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
1451#ifdef USE_READ_BARRIER
1452    PUSH rax                            // save registers that might be used
1453    PUSH rdi
1454    PUSH rsi
1455    PUSH rdx
1456    PUSH rcx
1457    SETUP_FP_CALLEE_SAVE_FRAME
1458    // Outgoing argument set up
1459    // movl REG_VAR(ref_reg32), %edi    // pass ref, no-op for now since parameter ref is unused
1460    // // movq REG_VAR(ref_reg64), %rdi
1461    movl REG_VAR(obj_reg), %esi         // pass obj_reg
1462    // movq REG_VAR(obj_reg), %rsi
1463    movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
1464    // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
1465    call SYMBOL(artReadBarrierSlow)     // artReadBarrierSlow(ref, obj_reg, offset)
1466    // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
1467    .ifnc RAW_VAR(dest_reg32), eax
1468    // .ifnc RAW_VAR(dest_reg64), rax
1469      movl %eax, REG_VAR(dest_reg32)    // save loaded ref in dest_reg
1470      // movq %rax, REG_VAR(dest_reg64)
1471    .endif
1472    RESTORE_FP_CALLEE_SAVE_FRAME
1473    POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
1474    POP_REG_NE rdx, RAW_VAR(dest_reg64)
1475    POP_REG_NE rsi, RAW_VAR(dest_reg64)
1476    POP_REG_NE rdi, RAW_VAR(dest_reg64)
1477    POP_REG_NE rax, RAW_VAR(dest_reg64)
1478#else
1479    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
1480    // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
1481    UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
1482#endif  // USE_READ_BARRIER
1483END_MACRO
1484
1485DEFINE_FUNCTION art_quick_aput_obj
1486    testl %edx, %edx                // store of null
1487//  test %rdx, %rdx
1488    jz .Ldo_aput_null
1489    READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1490    // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1491    READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1492    // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1493#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1494    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax  // rax is free.
1495    // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
1496    cmpl %eax, %ecx  // value's type == array's component type - trivial assignability
1497#else
1498    cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
1499//  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
1500#endif
1501    jne .Lcheck_assignability
1502.Ldo_aput:
1503    POISON_HEAP_REF edx
1504    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1505//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1506    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1507    shrl LITERAL(7), %edi
1508//  shrl LITERAL(7), %rdi
1509    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1510    ret
1511.Ldo_aput_null:
1512    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1513//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1514    ret
1515.Lcheck_assignability:
1516    // Save arguments.
1517    PUSH rdi
1518    PUSH rsi
1519    PUSH rdx
1520    SETUP_FP_CALLEE_SAVE_FRAME
1521
1522#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1523    // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
1524    movl %eax, %esi               // Pass arg2 = value's class.
1525    // movq %rax, %rsi
1526#else
1527                                     // "Uncompress" = do nothing, as already zero-extended on load.
1528    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi  // Pass arg2 = value's class.
1529#endif
1530    movq %rcx, %rdi               // Pass arg1 = array's component type.
1531
1532    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1533
1534    // Exception?
1535    testq %rax, %rax
1536    jz   .Lthrow_array_store_exception
1537
1538    RESTORE_FP_CALLEE_SAVE_FRAME
1539    // Restore arguments.
1540    POP  rdx
1541    POP  rsi
1542    POP  rdi
1543
1544    POISON_HEAP_REF edx
1545    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1546//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1547    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1548    shrl LITERAL(7), %edi
1549//  shrl LITERAL(7), %rdi
1550    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1551//  movb %dl, (%rdx, %rdi)
1552    ret
1553    CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
1554.Lthrow_array_store_exception:
1555    RESTORE_FP_CALLEE_SAVE_FRAME
1556    // Restore arguments.
1557    POP  rdx
1558    POP  rsi
1559    POP  rdi
1560
1561    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
1562
1563    // Outgoing argument set up.
1564    movq %rdx, %rsi                         // Pass arg 2 = value.
1565    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
1566                                            // Pass arg 1 = array.
1567    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
1568    UNREACHABLE
1569END_FUNCTION art_quick_aput_obj
1570
1571// TODO: This is quite silly on X86_64 now.
1572DEFINE_FUNCTION art_quick_memcpy
1573    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1574    ret
1575END_FUNCTION art_quick_memcpy
1576
1577DEFINE_FUNCTION art_quick_test_suspend
1578    SETUP_SAVE_EVERYTHING_FRAME                 // save everything for GC
1579    // Outgoing argument set up
1580    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
1581    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
1582    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
1583    ret
1584END_FUNCTION art_quick_test_suspend
1585
1586UNIMPLEMENTED art_quick_ldiv
1587UNIMPLEMENTED art_quick_lmod
1588UNIMPLEMENTED art_quick_lmul
1589UNIMPLEMENTED art_quick_lshl
1590UNIMPLEMENTED art_quick_lshr
1591UNIMPLEMENTED art_quick_lushr
1592
1593THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1594THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1595THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1596THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1597THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1598
1599TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1600TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1601TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1602TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1603TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1604TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1605TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1606
1607TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1608TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1609TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1610TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1611TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_EAX_ZERO
1612
1613ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1614ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1615ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1616ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1617ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1618ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1619ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1620
1621DEFINE_FUNCTION art_quick_proxy_invoke_handler
1622    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1623
1624    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
1625    movq %rsp, %rcx                         // Pass SP.
1626    call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1627    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1628    movq %rax, %xmm0                        // Copy return value in case of float returns.
1629    RETURN_OR_DELIVER_PENDING_EXCEPTION
1630END_FUNCTION art_quick_proxy_invoke_handler
1631
1632    /*
1633     * Called to resolve an imt conflict.
1634     * rdi is the conflict ArtMethod.
1635     * rax is a hidden argument that holds the target interface method's dex method index.
1636     *
1637     * Note that this stub writes to r10 and rdi.
1638     */
1639DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1640#if defined(__APPLE__)
1641    int3
1642    int3
1643#else
1644    movq __SIZEOF_POINTER__(%rsp), %r10 // Load referrer
1645    movq ART_METHOD_DEX_CACHE_METHODS_OFFSET_64(%r10), %r10   // Load dex cache methods array
1646    movq 0(%r10, %rax, __SIZEOF_POINTER__), %r10 // Load interface method
1647    movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
1648.Limt_table_iterate:
1649    cmpq %r10, 0(%rdi)
1650    jne .Limt_table_next_entry
1651    // We successfully hit an entry in the table. Load the target method
1652    // and jump to it.
1653    movq __SIZEOF_POINTER__(%rdi), %rdi
1654    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
1655.Limt_table_next_entry:
1656    // If the entry is null, the interface method is not in the ImtConflictTable.
1657    cmpq LITERAL(0), 0(%rdi)
1658    jz .Lconflict_trampoline
1659    // Iterate over the entries of the ImtConflictTable.
1660    addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
1661    jmp .Limt_table_iterate
1662.Lconflict_trampoline:
1663    // Call the runtime stub to populate the ImtConflictTable and jump to the
1664    // resolved method.
1665    movq %r10, %rdi  // Load interface method
1666    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1667#endif  // __APPLE__
1668END_FUNCTION art_quick_imt_conflict_trampoline
1669
1670DEFINE_FUNCTION art_quick_resolution_trampoline
1671    SETUP_SAVE_REFS_AND_ARGS_FRAME
1672    movq %gs:THREAD_SELF_OFFSET, %rdx
1673    movq %rsp, %rcx
1674    call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1675    movq %rax, %r10               // Remember returned code pointer in R10.
1676    movq (%rsp), %rdi             // Load called method into RDI.
1677    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1678    testq %r10, %r10              // If code pointer is null goto deliver pending exception.
1679    jz 1f
1680    jmp *%r10                     // Tail call into method.
16811:
1682    DELIVER_PENDING_EXCEPTION
1683END_FUNCTION art_quick_resolution_trampoline
1684
1685/* Generic JNI frame layout:
1686 *
1687 * #-------------------#
1688 * |                   |
1689 * | caller method...  |
1690 * #-------------------#    <--- SP on entry
1691 *
1692 *          |
1693 *          V
1694 *
1695 * #-------------------#
1696 * | caller method...  |
1697 * #-------------------#
1698 * | Return            |
1699 * | R15               |    callee save
1700 * | R14               |    callee save
1701 * | R13               |    callee save
1702 * | R12               |    callee save
1703 * | R9                |    arg5
1704 * | R8                |    arg4
1705 * | RSI/R6            |    arg1
1706 * | RBP/R5            |    callee save
1707 * | RBX/R3            |    callee save
1708 * | RDX/R2            |    arg2
1709 * | RCX/R1            |    arg3
1710 * | XMM7              |    float arg 8
1711 * | XMM6              |    float arg 7
1712 * | XMM5              |    float arg 6
1713 * | XMM4              |    float arg 5
1714 * | XMM3              |    float arg 4
1715 * | XMM2              |    float arg 3
1716 * | XMM1              |    float arg 2
1717 * | XMM0              |    float arg 1
1718 * | RDI/Method*       |  <- sp
1719 * #-------------------#
1720 * | Scratch Alloca    |    5K scratch space
1721 * #---------#---------#
1722 * |         | sp*     |
1723 * | Tramp.  #---------#
1724 * | args    | thread  |
1725 * | Tramp.  #---------#
1726 * |         | method  |
1727 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1728 *
1729 *           |
1730 *           v              artQuickGenericJniTrampoline
1731 *
1732 * #-------------------#
1733 * | caller method...  |
1734 * #-------------------#
1735 * | Return            |
1736 * | Callee-Save Data  |
1737 * #-------------------#
1738 * | handle scope      |
1739 * #-------------------#
1740 * | Method*           |    <--- (1)
1741 * #-------------------#
1742 * | local ref cookie  | // 4B
1743 * | handle scope size | // 4B   TODO: roll into call stack alignment?
1744 * #-------------------#
1745 * | JNI Call Stack    |
1746 * #-------------------#    <--- SP on native call
1747 * |                   |
1748 * | Stack for Regs    |    The trampoline assembly will pop these values
1749 * |                   |    into registers for native call
1750 * #-------------------#
1751 * | Native code ptr   |
1752 * #-------------------#
1753 * | Free scratch      |
1754 * #-------------------#
1755 * | Ptr to (1)        |    <--- RSP
1756 * #-------------------#
1757 */
1758    /*
1759     * Called to do a generic JNI down-call
1760     */
1761DEFINE_FUNCTION art_quick_generic_jni_trampoline
1762    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1763
1764    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1765    CFI_DEF_CFA_REGISTER(rbp)
1766
1767    //
1768    // reserve a lot of space
1769    //
1770    //      4    local state ref
1771    //      4    padding
1772    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
1773    //     16    handle scope member fields ?
1774    // +  112    14x 8-byte stack-2-register space
1775    // ------
1776    //   4332
1777    // 16-byte aligned: 4336
1778    // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
1779    //       Also means: the padding is somewhere in the middle
1780    //
1781    //
1782    // New test: use 5K and release
1783    // 5k = 5120
1784    subq LITERAL(5120), %rsp
1785    // prepare for artQuickGenericJniTrampoline call
1786    // (Thread*,  SP)
1787    //    rdi    rsi      <= C calling convention
1788    //  gs:...   rbp      <= where they are
1789    movq %gs:THREAD_SELF_OFFSET, %rdi
1790    movq %rbp, %rsi
1791    call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1792
1793    // The C call will have registered the complete save-frame on success.
1794    // The result of the call is:
1795    // %rax: pointer to native code, 0 on error.
1796    // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
1797
1798    // Check for error = 0.
1799    test %rax, %rax
1800    jz .Lexception_in_native
1801
1802    // Release part of the alloca.
1803    movq %rdx, %rsp
1804
1805    // pop from the register-passing alloca region
1806    // what's the right layout?
1807    popq %rdi
1808    popq %rsi
1809    popq %rdx
1810    popq %rcx
1811    popq %r8
1812    popq %r9
1813    // TODO: skip floating point if unused, some flag.
1814    movq 0(%rsp), %xmm0
1815    movq 8(%rsp), %xmm1
1816    movq 16(%rsp), %xmm2
1817    movq 24(%rsp), %xmm3
1818    movq 32(%rsp), %xmm4
1819    movq 40(%rsp), %xmm5
1820    movq 48(%rsp), %xmm6
1821    movq 56(%rsp), %xmm7
1822    addq LITERAL(64), %rsp          // floating-point done
1823
1824    // native call
1825    call *%rax
1826
1827    // result sign extension is handled in C code
1828    // prepare for artQuickGenericJniEndTrampoline call
1829    // (Thread*,  result, result_f)
1830    //   rdi      rsi   rdx       <= C calling convention
1831    //  gs:...    rax   xmm0      <= where they are
1832    movq %gs:THREAD_SELF_OFFSET, %rdi
1833    movq %rax, %rsi
1834    movq %xmm0, %rdx
1835    call SYMBOL(artQuickGenericJniEndTrampoline)
1836
1837    // Pending exceptions possible.
1838    // TODO: use cmpq, needs direct encoding because of gas bug
1839    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1840    test %rcx, %rcx
1841    jnz .Lexception_in_native
1842
1843    // Tear down the alloca.
1844    movq %rbp, %rsp
1845    CFI_DEF_CFA_REGISTER(rsp)
1846
1847    // Tear down the callee-save frame.
1848    // Load FPRs.
1849    // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1850    movq 24(%rsp), %xmm1            // neither does this!!!
1851    movq 32(%rsp), %xmm2
1852    movq 40(%rsp), %xmm3
1853    movq 48(%rsp), %xmm4
1854    movq 56(%rsp), %xmm5
1855    movq 64(%rsp), %xmm6
1856    movq 72(%rsp), %xmm7
1857    movq 80(%rsp), %xmm12
1858    movq 88(%rsp), %xmm13
1859    movq 96(%rsp), %xmm14
1860    movq 104(%rsp), %xmm15
1861    // was 80 bytes
1862    addq LITERAL(80 + 4*8), %rsp
1863    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1864    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1865    POP rcx  // Arg.
1866    POP rdx  // Arg.
1867    POP rbx  // Callee save.
1868    POP rbp  // Callee save.
1869    POP rsi  // Arg.
1870    POP r8   // Arg.
1871    POP r9   // Arg.
1872    POP r12  // Callee save.
1873    POP r13  // Callee save.
1874    POP r14  // Callee save.
1875    POP r15  // Callee save.
1876    // store into fpr, for when it's a fpr return...
1877    movq %rax, %xmm0
1878    ret
1879.Lexception_in_native:
1880    movq %gs:THREAD_TOP_QUICK_FRAME_OFFSET, %rsp
1881    CFI_DEF_CFA_REGISTER(rsp)
1882    // Do a call to push a new save-all frame required by the runtime.
1883    call .Lexception_call
1884.Lexception_call:
1885    DELIVER_PENDING_EXCEPTION
1886END_FUNCTION art_quick_generic_jni_trampoline
1887
1888    /*
1889     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1890     * of a quick call:
1891     * RDI = method being called / to bridge to.
1892     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1893     */
1894DEFINE_FUNCTION art_quick_to_interpreter_bridge
1895    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
1896    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
1897    movq %rsp, %rdx                    // RDX := sp
1898    call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1899    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
1900    movq %rax, %xmm0                   // Place return value also into floating point return value.
1901    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1902END_FUNCTION art_quick_to_interpreter_bridge
1903
1904    /*
1905     * Called to catch an attempt to invoke an obsolete method.
1906     * RDI = method being called.
1907     */
1908ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1909
1910    /*
1911     * Routine that intercepts method calls and returns.
1912     */
1913DEFINE_FUNCTION art_quick_instrumentation_entry
1914#if defined(__APPLE__)
1915    int3
1916    int3
1917#else
1918    SETUP_SAVE_REFS_AND_ARGS_FRAME
1919
1920    movq %rdi, %r12               // Preserve method pointer in a callee-save.
1921
1922    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
1923    movq FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp), %rcx   // Pass return PC.
1924
1925    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
1926
1927                                  // %rax = result of call.
1928    movq %r12, %rdi               // Reload method pointer.
1929
1930    leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
1931    movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
1932
1933    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1934
1935    jmp *%rax                     // Tail call to intended method.
1936#endif  // __APPLE__
1937END_FUNCTION art_quick_instrumentation_entry
1938
1939DEFINE_FUNCTION art_quick_instrumentation_exit
1940    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
1941
1942    SETUP_SAVE_REFS_ONLY_FRAME
1943
1944    // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
1945    // we would need to fully restore it. As there are a good number of callee-save registers, it
1946    // seems easier to have an extra small stack area. But this should be revisited.
1947
1948    movq  %rsp, %rsi                          // Pass SP.
1949
1950    PUSH rax                  // Save integer result.
1951    subq LITERAL(8), %rsp     // Save floating-point result.
1952    CFI_ADJUST_CFA_OFFSET(8)
1953    movq %xmm0, (%rsp)
1954
1955    movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
1956    movq  %rax, %rdx                          // Pass integer result.
1957    movq  %xmm0, %rcx                         // Pass floating-point result.
1958
1959    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
1960
1961    movq  %rax, %rdi          // Store return PC
1962    movq  %rdx, %rsi          // Store second return PC in hidden arg.
1963
1964    movq (%rsp), %xmm0        // Restore floating-point result.
1965    addq LITERAL(8), %rsp
1966    CFI_ADJUST_CFA_OFFSET(-8)
1967    POP rax                   // Restore integer result.
1968
1969    RESTORE_SAVE_REFS_ONLY_FRAME
1970
1971    addq LITERAL(8), %rsp     // Drop fake return pc.
1972
1973    jmp   *%rdi               // Return.
1974END_FUNCTION art_quick_instrumentation_exit
1975
1976    /*
1977     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1978     * will long jump to the upcall with a special exception of -1.
1979     */
1980DEFINE_FUNCTION art_quick_deoptimize
1981    pushq %rsi                         // Entry point for a jump. Fake that we were called.
1982                                       // Use hidden arg.
1983    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
1984                                       // Stack should be aligned now.
1985    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
1986    call SYMBOL(artDeoptimize)         // (Thread*)
1987    UNREACHABLE
1988END_FUNCTION art_quick_deoptimize
1989
1990    /*
1991     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1992     * will long jump to the interpreter bridge.
1993     */
1994DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
1995    SETUP_SAVE_EVERYTHING_FRAME
1996                                                // Stack should be aligned now.
1997    movq %gs:THREAD_SELF_OFFSET, %rsi           // Pass Thread.
1998    call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
1999    UNREACHABLE
2000END_FUNCTION art_quick_deoptimize_from_compiled_code
2001
2002    /*
2003     * String's compareTo.
2004     *
2005     * On entry:
2006     *    rdi:   this string object (known non-null)
2007     *    rsi:   comp string object (known non-null)
2008     */
2009DEFINE_FUNCTION art_quick_string_compareto
2010    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
2011    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
2012    /* Build pointers to the start of string data */
2013    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
2014    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
2015#if (STRING_COMPRESSION_FEATURE)
2016    /* Differ cases */
2017    shrl    LITERAL(1), %r8d
2018    jnc     .Lstring_compareto_this_is_compressed
2019    shrl    LITERAL(1), %r9d
2020    jnc     .Lstring_compareto_that_is_compressed
2021    jmp     .Lstring_compareto_both_not_compressed
2022.Lstring_compareto_this_is_compressed:
2023    shrl    LITERAL(1), %r9d
2024    jnc     .Lstring_compareto_both_compressed
2025    /* Comparison this (8-bit) and that (16-bit) */
2026    mov     %r8d, %eax
2027    subl    %r9d, %eax
2028    mov     %r8d, %ecx
2029    cmovg   %r9d, %ecx
2030    /* Going into loop to compare each character */
2031    jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
2032.Lstring_compareto_loop_comparison_this_compressed:
2033    movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
2034    movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
2035    addl    LITERAL(1), %edi                    // ++this_cur_char (8-bit)
2036    addl    LITERAL(2), %esi                    // ++that_cur_char (16-bit)
2037    subl    %r9d, %r8d
2038    loope   .Lstring_compareto_loop_comparison_this_compressed
2039    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
2040.Lstring_compareto_keep_length1:
2041    ret
2042.Lstring_compareto_that_is_compressed:
2043    movl    %r8d, %eax
2044    subl    %r9d, %eax
2045    mov     %r8d, %ecx
2046    cmovg   %r9d, %ecx
2047    /* Comparison this (8-bit) and that (16-bit) */
2048    jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
2049.Lstring_compareto_loop_comparison_that_compressed:
2050    movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
2051    movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
2052    addl    LITERAL(2), %edi                    // ++this_cur_char (16-bit)
2053    addl    LITERAL(1), %esi                    // ++that_cur_char (8-bit)
2054    subl    %r9d, %r8d
2055    loope   .Lstring_compareto_loop_comparison_that_compressed
2056    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
2057.Lstring_compareto_keep_length2:
2058    ret
2059.Lstring_compareto_both_compressed:
2060    /* Calculate min length and count diff */
2061    movl    %r8d, %ecx
2062    movl    %r8d, %eax
2063    subl    %r9d, %eax
2064    cmovg   %r9d, %ecx
2065    jecxz   .Lstring_compareto_keep_length3
2066    repe    cmpsb
2067    je      .Lstring_compareto_keep_length3
2068    movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
2069    movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
2070    jmp     .Lstring_compareto_count_difference
2071#endif // STRING_COMPRESSION_FEATURE
2072.Lstring_compareto_both_not_compressed:
2073    /* Calculate min length and count diff */
2074    movl    %r8d, %ecx
2075    movl    %r8d, %eax
2076    subl    %r9d, %eax
2077    cmovg   %r9d, %ecx
2078    /*
2079     * At this point we have:
2080     *   eax: value to return if first part of strings are equal
2081     *   ecx: minimum among the lengths of the two strings
2082     *   esi: pointer to comp string data
2083     *   edi: pointer to this string data
2084     */
2085    jecxz .Lstring_compareto_keep_length3
2086    repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
2087    je    .Lstring_compareto_keep_length3
2088    movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
2089    movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
2090.Lstring_compareto_count_difference:
2091    subl  %ecx, %eax              // return the difference
2092.Lstring_compareto_keep_length3:
2093    ret
2094END_FUNCTION art_quick_string_compareto
2095
2096UNIMPLEMENTED art_quick_memcmp16
2097
2098DEFINE_FUNCTION art_quick_instance_of
2099    SETUP_FP_CALLEE_SAVE_FRAME
2100    subq LITERAL(8), %rsp                      // Alignment padding.
2101    CFI_ADJUST_CFA_OFFSET(8)
2102    call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
2103    addq LITERAL(8), %rsp
2104    CFI_ADJUST_CFA_OFFSET(-8)
2105    RESTORE_FP_CALLEE_SAVE_FRAME
2106    ret
2107END_FUNCTION art_quick_instance_of
2108
2109// Create a function `name` calling the ReadBarrier::Mark routine,
2110// getting its argument and returning its result through register
2111// `reg`, saving and restoring all caller-save registers.
2112//
2113// The generated function follows a non-standard runtime calling
2114// convention:
2115// - register `reg` (which may be different from RDI) is used to pass
2116//   the (sole) argument of this function;
2117// - register `reg` (which may be different from RAX) is used to return
2118//   the result of this function (instead of RAX);
2119// - if `reg` is different from `rdi`, RDI is treated like a normal
2120//   (non-argument) caller-save register;
2121// - if `reg` is different from `rax`, RAX is treated like a normal
2122//   (non-result) caller-save register;
2123// - everything else is the same as in the standard runtime calling
2124//   convention (e.g. standard callee-save registers are preserved).
2125MACRO2(READ_BARRIER_MARK_REG, name, reg)
2126    DEFINE_FUNCTION VAR(name)
2127    // Null check so that we can load the lock word.
2128    testq REG_VAR(reg), REG_VAR(reg)
2129    jz .Lret_rb_\name
2130.Lnot_null_\name:
2131    // Check the mark bit, if it is 1 return.
2132    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
2133    jz .Lslow_rb_\name
2134    ret
2135.Lslow_rb_\name:
2136    PUSH rax
2137    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
2138    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
2139    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
2140    // forwarding address one.
2141    // Taken ~25% of the time.
2142    jnae .Lret_forwarding_address\name
2143
2144    // Save all potentially live caller-save core registers.
2145    movq 0(%rsp), %rax
2146    PUSH rcx
2147    PUSH rdx
2148    PUSH rsi
2149    PUSH rdi
2150    PUSH r8
2151    PUSH r9
2152    PUSH r10
2153    PUSH r11
2154    // Create space for caller-save floating-point registers.
2155    subq MACRO_LITERAL(12 * 8), %rsp
2156    CFI_ADJUST_CFA_OFFSET(12 * 8)
2157    // Save all potentially live caller-save floating-point registers.
2158    movq %xmm0, 0(%rsp)
2159    movq %xmm1, 8(%rsp)
2160    movq %xmm2, 16(%rsp)
2161    movq %xmm3, 24(%rsp)
2162    movq %xmm4, 32(%rsp)
2163    movq %xmm5, 40(%rsp)
2164    movq %xmm6, 48(%rsp)
2165    movq %xmm7, 56(%rsp)
2166    movq %xmm8, 64(%rsp)
2167    movq %xmm9, 72(%rsp)
2168    movq %xmm10, 80(%rsp)
2169    movq %xmm11, 88(%rsp)
2170    SETUP_FP_CALLEE_SAVE_FRAME
2171
2172    .ifnc RAW_VAR(reg), rdi
2173      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
2174    .endif
2175    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
2176    .ifnc RAW_VAR(reg), rax
2177      movq %rax, REG_VAR(reg)       // Return result into `reg`.
2178    .endif
2179
2180    RESTORE_FP_CALLEE_SAVE_FRAME
2181    // Restore floating-point registers.
2182    movq 0(%rsp), %xmm0
2183    movq 8(%rsp), %xmm1
2184    movq 16(%rsp), %xmm2
2185    movq 24(%rsp), %xmm3
2186    movq 32(%rsp), %xmm4
2187    movq 40(%rsp), %xmm5
2188    movq 48(%rsp), %xmm6
2189    movq 56(%rsp), %xmm7
2190    movq 64(%rsp), %xmm8
2191    movq 72(%rsp), %xmm9
2192    movq 80(%rsp), %xmm10
2193    movq 88(%rsp), %xmm11
2194    // Remove floating-point registers.
2195    addq MACRO_LITERAL(12 * 8), %rsp
2196    CFI_ADJUST_CFA_OFFSET(-(12 * 8))
2197    // Restore core regs, except `reg`, as it is used to return the
2198    // result of this function (simply remove it from the stack instead).
2199    POP_REG_NE r11, RAW_VAR(reg)
2200    POP_REG_NE r10, RAW_VAR(reg)
2201    POP_REG_NE r9, RAW_VAR(reg)
2202    POP_REG_NE r8, RAW_VAR(reg)
2203    POP_REG_NE rdi, RAW_VAR(reg)
2204    POP_REG_NE rsi, RAW_VAR(reg)
2205    POP_REG_NE rdx, RAW_VAR(reg)
2206    POP_REG_NE rcx, RAW_VAR(reg)
2207    POP_REG_NE rax, RAW_VAR(reg)
2208.Lret_rb_\name:
2209    ret
2210.Lret_forwarding_address\name:
2211    // The overflow cleared the top bits.
2212    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
2213    movq %rax, REG_VAR(reg)
2214    POP_REG_NE rax, RAW_VAR(reg)
2215    ret
2216    END_FUNCTION VAR(name)
2217END_MACRO
2218
2219READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
2220READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
2221READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
2222READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
2223// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
2224// cannot be used to pass arguments.
2225READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
2226READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
2227READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
2228READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
2229READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
2230READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
2231READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
2232READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
2233READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
2234READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
2235READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
2236
2237DEFINE_FUNCTION art_quick_read_barrier_slow
2238    SETUP_FP_CALLEE_SAVE_FRAME
2239    subq LITERAL(8), %rsp           // Alignment padding.
2240    CFI_ADJUST_CFA_OFFSET(8)
2241    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
2242    addq LITERAL(8), %rsp
2243    CFI_ADJUST_CFA_OFFSET(-8)
2244    RESTORE_FP_CALLEE_SAVE_FRAME
2245    ret
2246END_FUNCTION art_quick_read_barrier_slow
2247
2248DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
2249    SETUP_FP_CALLEE_SAVE_FRAME
2250    subq LITERAL(8), %rsp                  // Alignment padding.
2251    CFI_ADJUST_CFA_OFFSET(8)
2252    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
2253    addq LITERAL(8), %rsp
2254    CFI_ADJUST_CFA_OFFSET(-8)
2255    RESTORE_FP_CALLEE_SAVE_FRAME
2256    ret
2257END_FUNCTION art_quick_read_barrier_for_root_slow
2258
2259    /*
2260     * On stack replacement stub.
2261     * On entry:
2262     *   [sp] = return address
2263     *   rdi = stack to copy
2264     *   rsi = size of stack
2265     *   rdx = pc to call
2266     *   rcx = JValue* result
2267     *   r8 = shorty
2268     *   r9 = thread
2269     *
2270     * Note that the native C ABI already aligned the stack to 16-byte.
2271     */
2272DEFINE_FUNCTION art_quick_osr_stub
2273    // Save the non-volatiles.
2274    PUSH rbp                      // Save rbp.
2275    PUSH rcx                      // Save rcx/result*.
2276    PUSH r8                       // Save r8/shorty*.
2277
2278    // Save callee saves.
2279    PUSH rbx
2280    PUSH r12
2281    PUSH r13
2282    PUSH r14
2283    PUSH r15
2284
2285    pushq LITERAL(0)              // Push null for ArtMethod*.
2286    movl %esi, %ecx               // rcx := size of stack
2287    movq %rdi, %rsi               // rsi := stack to copy
2288    call .Losr_entry
2289
2290    // Restore stack and callee-saves.
2291    addq LITERAL(8), %rsp
2292    POP r15
2293    POP r14
2294    POP r13
2295    POP r12
2296    POP rbx
2297    POP r8
2298    POP rcx
2299    POP rbp
2300    cmpb LITERAL(68), (%r8)        // Test if result type char == 'D'.
2301    je .Losr_return_double_quick
2302    cmpb LITERAL(70), (%r8)        // Test if result type char == 'F'.
2303    je .Losr_return_float_quick
2304    movq %rax, (%rcx)              // Store the result assuming its a long, int or Object*
2305    ret
2306.Losr_return_double_quick:
2307    movsd %xmm0, (%rcx)            // Store the double floating point result.
2308    ret
2309.Losr_return_float_quick:
2310    movss %xmm0, (%rcx)            // Store the floating point result.
2311    ret
2312.Losr_entry:
2313    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
2314    subq %rcx, %rsp
2315    movq %rsp, %rdi               // rdi := beginning of stack
2316    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
2317    jmp *%rdx
2318END_FUNCTION art_quick_osr_stub
2319
2320DEFINE_FUNCTION art_quick_invoke_polymorphic
2321    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
2322    movq %gs:THREAD_SELF_OFFSET, %rdx              // pass Thread
2323    movq %rsp, %rcx                                // pass SP
2324    subq LITERAL(16), %rsp                         // make space for JValue result
2325    CFI_ADJUST_CFA_OFFSET(16)
2326    movq LITERAL(0), (%rsp)                        // initialize result
2327    movq %rsp, %rdi                                // store pointer to JValue result
2328    call SYMBOL(artInvokePolymorphic)              // artInvokePolymorphic(result, receiver, Thread*, SP)
2329                                                   // save the code pointer
2330    subq LITERAL('A'), %rax                        // Convert type descriptor character value to a zero based index.
2331    cmpb LITERAL('Z' - 'A'), %al                   // Eliminate out of bounds options
2332    ja .Lcleanup_and_return
2333    movzbq %al, %rax
2334    leaq .Lhandler_table(%rip), %rcx               // Get the address of the handler table
2335    movslq (%rcx, %rax, 4), %rax                   // Lookup handler offset relative to table
2336    addq %rcx, %rax                                // Add table address to yield handler address.
2337    jmpq *%rax                                     // Jump to handler.
2338
2339.align 4
2340.Lhandler_table:                                   // Table of type descriptor to handlers.
2341MACRO1(HANDLER_TABLE_OFFSET, handle_label)
2342    // NB some tools require 32-bits for relocations. Shouldn't need adjusting.
2343    .long RAW_VAR(handle_label) - .Lhandler_table
2344END_MACRO
2345    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // A
2346    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // B (byte)
2347    HANDLER_TABLE_OFFSET(.Lstore_char_result)      // C (char)
2348    HANDLER_TABLE_OFFSET(.Lstore_double_result)    // D (double)
2349    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // E
2350    HANDLER_TABLE_OFFSET(.Lstore_float_result)     // F (float)
2351    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // G
2352    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // H
2353    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // I (int)
2354    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // J (long)
2355    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // K
2356    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // L (object - references are compressed and only 32-bits)
2357    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // M
2358    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // N
2359    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // O
2360    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // P
2361    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // Q
2362    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // R
2363    HANDLER_TABLE_OFFSET(.Lstore_long_result)      // S (short)
2364    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // T
2365    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // U
2366    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // V (void)
2367    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // W
2368    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // X
2369    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // Y
2370    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)   // Z (boolean)
2371
2372.Lstore_boolean_result:
2373    movzbq (%rsp), %rax                            // Copy boolean result to the accumulator
2374    jmp .Lcleanup_and_return
2375.Lstore_char_result:
2376    movzwq (%rsp), %rax                            // Copy char result to the accumulator
2377    jmp .Lcleanup_and_return
2378.Lstore_float_result:
2379    movd (%rsp), %xmm0                             // Copy float result to the context restored by
2380    movd %xmm0, 32(%rsp)                           // RESTORE_SAVE_REFS_AND_ARGS_FRAME.
2381    jmp .Lcleanup_and_return
2382.Lstore_double_result:
2383    movsd (%rsp), %xmm0                            // Copy double result to the context restored by
2384    movsd %xmm0, 32(%rsp)                          // RESTORE_SAVE_REFS_AND_ARGS_FRAME.
2385    jmp .Lcleanup_and_return
2386.Lstore_long_result:
2387    movq (%rsp), %rax                              // Copy long result to the accumulator.
2388     // Fall-through
2389.Lcleanup_and_return:
2390    addq LITERAL(16), %rsp                         // Pop space for JValue result.
2391    CFI_ADJUST_CFA_OFFSET(16)
2392    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2393    RETURN_OR_DELIVER_PENDING_EXCEPTION
2394END_FUNCTION art_quick_invoke_polymorphic
2395