• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21
22    /* Deliver the given exception */
23    .extern artDeliverExceptionFromCode
24    /* Deliver an exception pending on a thread */
25    .extern artDeliverPendingException
26
27    /*
28     * Macro to spill the GPRs.
29     */
30.macro SPILL_ALL_CALLEE_SAVE_GPRS
31    push {r4-r11, lr}                             @ 9 words (36 bytes) of callee saves.
32    .cfi_adjust_cfa_offset 36
33    .cfi_rel_offset r4, 0
34    .cfi_rel_offset r5, 4
35    .cfi_rel_offset r6, 8
36    .cfi_rel_offset r7, 12
37    .cfi_rel_offset r8, 16
38    .cfi_rel_offset r9, 20
39    .cfi_rel_offset r10, 24
40    .cfi_rel_offset r11, 28
41    .cfi_rel_offset lr, 32
42.endm
43
44    /*
45     * Macro that sets up the callee save frame to conform with
46     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
47     */
48.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp
49    SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
50    vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
51    .cfi_adjust_cfa_offset 64
52    sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
53    .cfi_adjust_cfa_offset 12
54    RUNTIME_CURRENT1 \rTemp                       @ Load Runtime::Current into rTemp.
55    @ Load kSaveAllCalleeSaves Method* into rTemp.
56    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
57    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
58    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
59
60     // Ugly compile-time check, but we only have the preprocessor.
61#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12)
62#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected."
63#endif
64.endm
65
66    /*
67     * Macro that sets up the callee save frame to conform with
68     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
69     */
70.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
71    // Note: We could avoid saving R8 in the case of Baker read
72    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
73    // later; but it's not worth handling this special case.
74    push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
75    .cfi_adjust_cfa_offset 28
76    .cfi_rel_offset r5, 0
77    .cfi_rel_offset r6, 4
78    .cfi_rel_offset r7, 8
79    .cfi_rel_offset r8, 12
80    .cfi_rel_offset r10, 16
81    .cfi_rel_offset r11, 20
82    .cfi_rel_offset lr, 24
83    sub sp, #4                                    @ bottom word will hold Method*
84    .cfi_adjust_cfa_offset 4
85    RUNTIME_CURRENT2 \rTemp                       @ Load Runtime::Current into rTemp.
86    @ Load kSaveRefsOnly Method* into rTemp.
87    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
88    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
89    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
90
91    // Ugly compile-time check, but we only have the preprocessor.
92#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
93#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
94#endif
95.endm
96
97.macro RESTORE_SAVE_REFS_ONLY_FRAME
98    add sp, #4               @ bottom word holds Method*
99    .cfi_adjust_cfa_offset -4
100    // Note: Likewise, we could avoid restoring R8 in the case of Baker
101    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
102    // later; but it's not worth handling this special case.
103    pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
104    .cfi_restore r5
105    .cfi_restore r6
106    .cfi_restore r7
107    .cfi_restore r8
108    .cfi_restore r10
109    .cfi_restore r11
110    .cfi_restore lr
111    .cfi_adjust_cfa_offset -28
112.endm
113
114    /*
115     * Macro that sets up the callee save frame to conform with
116     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
117     */
118.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
119    // Note: We could avoid saving R8 in the case of Baker read
120    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
121    // later; but it's not worth handling this special case.
122    push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves and args.
123    .cfi_adjust_cfa_offset 40
124    .cfi_rel_offset r1, 0
125    .cfi_rel_offset r2, 4
126    .cfi_rel_offset r3, 8
127    .cfi_rel_offset r5, 12
128    .cfi_rel_offset r6, 16
129    .cfi_rel_offset r7, 20
130    .cfi_rel_offset r8, 24
131    .cfi_rel_offset r10, 28
132    .cfi_rel_offset r11, 32
133    .cfi_rel_offset lr, 36
134    vpush {s0-s15}                     @ 16 words of float args.
135    .cfi_adjust_cfa_offset 64
136    sub sp, #8                         @ 2 words of space, alignment padding and Method*
137    .cfi_adjust_cfa_offset 8
138    // Ugly compile-time check, but we only have the preprocessor.
139#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8)
140#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected."
141#endif
142.endm
143
144.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
145    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
146    RUNTIME_CURRENT3 \rTemp                       @ Load Runtime::Current into rTemp.
147    @ Load kSaveRefsAndArgs Method* into rTemp.
148    ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
149    str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
150    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
151.endm
152
153.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
154    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
155    str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
156    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
157.endm
158
159.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
160    add  sp, #8                      @ rewind sp
161    .cfi_adjust_cfa_offset -8
162    vpop {s0-s15}
163    .cfi_adjust_cfa_offset -64
164    // Note: Likewise, we could avoid restoring X20 in the case of Baker
165    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
166    // later; but it's not worth handling this special case.
167    pop {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
168    .cfi_restore r1
169    .cfi_restore r2
170    .cfi_restore r3
171    .cfi_restore r5
172    .cfi_restore r6
173    .cfi_restore r7
174    .cfi_restore r8
175    .cfi_restore r10
176    .cfi_restore r11
177    .cfi_restore lr
178    .cfi_adjust_cfa_offset -40
179.endm
180
181    /*
182     * Macro that sets up the callee save frame to conform with
183     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
184     * when core registers are already saved.
185     */
186.macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
187                                        @ 14 words of callee saves and args already saved.
188    vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
189    .cfi_adjust_cfa_offset 128
190    sub sp, #8                          @ 2 words of space, alignment padding and Method*
191    .cfi_adjust_cfa_offset 8
192    RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
193    @ Load kSaveEverything Method* into rTemp.
194    ldr \rTemp, [\rTemp, #\runtime_method_offset]
195    str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
196    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
197
198    // Ugly compile-time check, but we only have the preprocessor.
199#if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
200#error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
201#endif
202.endm
203
204    /*
205     * Macro that sets up the callee save frame to conform with
206     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
207     */
208.macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
209    push {r0-r12, lr}                   @ 14 words of callee saves and args.
210    .cfi_adjust_cfa_offset 56
211    .cfi_rel_offset r0, 0
212    .cfi_rel_offset r1, 4
213    .cfi_rel_offset r2, 8
214    .cfi_rel_offset r3, 12
215    .cfi_rel_offset r4, 16
216    .cfi_rel_offset r5, 20
217    .cfi_rel_offset r6, 24
218    .cfi_rel_offset r7, 28
219    .cfi_rel_offset r8, 32
220    .cfi_rel_offset r9, 36
221    .cfi_rel_offset r10, 40
222    .cfi_rel_offset r11, 44
223    .cfi_rel_offset ip, 48
224    .cfi_rel_offset lr, 52
225    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset
226.endm
227
228.macro RESTORE_SAVE_EVERYTHING_FRAME
229    add  sp, #8                         @ rewind sp
230    .cfi_adjust_cfa_offset -8
231    vpop {d0-d15}
232    .cfi_adjust_cfa_offset -128
233    pop {r0-r12, lr}                    @ 14 words of callee saves
234    .cfi_restore r0
235    .cfi_restore r1
236    .cfi_restore r2
237    .cfi_restore r3
238    .cfi_restore r4
239    .cfi_restore r5
240    .cfi_restore r6
241    .cfi_restore r7
242    .cfi_restore r8
243    .cfi_restore r9
244    .cfi_restore r10
245    .cfi_restore r11
246    .cfi_restore r12
247    .cfi_restore lr
248    .cfi_adjust_cfa_offset -56
249.endm
250
251.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
252    add  sp, #8                         @ rewind sp
253    .cfi_adjust_cfa_offset -8
254    vpop {d0-d15}
255    .cfi_adjust_cfa_offset -128
256    add  sp, #4                         @ skip r0
257    .cfi_adjust_cfa_offset -4
258    .cfi_restore r0                     @ debugger can no longer restore caller's r0
259    pop {r1-r12, lr}                    @ 13 words of callee saves
260    .cfi_restore r1
261    .cfi_restore r2
262    .cfi_restore r3
263    .cfi_restore r4
264    .cfi_restore r5
265    .cfi_restore r6
266    .cfi_restore r7
267    .cfi_restore r8
268    .cfi_restore r9
269    .cfi_restore r10
270    .cfi_restore r11
271    .cfi_restore r12
272    .cfi_restore lr
273    .cfi_adjust_cfa_offset -52
274.endm
275
276// Macro to refresh the Marking Register (R8).
277//
278// This macro must be called at the end of functions implementing
279// entrypoints that possibly (directly or indirectly) perform a
280// suspend check (before they return).
281.macro REFRESH_MARKING_REGISTER
282#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
283    ldr rMR, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
284#endif
285.endm
286
287.macro RETURN_IF_RESULT_IS_ZERO
288    cbnz   r0, 1f              @ result non-zero branch over
289    bx     lr                  @ return
2901:
291.endm
292
293.macro RETURN_IF_RESULT_IS_NON_ZERO
294    cbz    r0, 1f              @ result zero branch over
295    bx     lr                  @ return
2961:
297.endm
298
299    /*
300     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
301     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
302     */
303.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
304    mov    r0, r9                              @ pass Thread::Current
305    bl     artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
306.endm
307
308    /*
309     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
310     * exception is Thread::Current()->exception_.
311     */
312.macro DELIVER_PENDING_EXCEPTION
313    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save callee saves for throw
314    DELIVER_PENDING_EXCEPTION_FRAME_READY
315.endm
316
317.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
318    .extern \cxx_name
319ENTRY \c_name
320    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
321    mov r0, r9                      @ pass Thread::Current
322    bl  \cxx_name                   @ \cxx_name(Thread*)
323END \c_name
324.endm
325
326.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
327    .extern \cxx_name
328ENTRY \c_name
329    SETUP_SAVE_EVERYTHING_FRAME r0  @ save all registers as basis for long jump context
330    mov r0, r9                      @ pass Thread::Current
331    bl  \cxx_name                   @ \cxx_name(Thread*)
332END \c_name
333.endm
334
335.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
336    .extern \cxx_name
337ENTRY \c_name
338    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
339    mov r1, r9                      @ pass Thread::Current
340    bl  \cxx_name                   @ \cxx_name(Thread*)
341END \c_name
342.endm
343
344.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
345    .extern \cxx_name
346ENTRY \c_name
347    SETUP_SAVE_EVERYTHING_FRAME r2  @ save all registers as basis for long jump context
348    mov r2, r9                      @ pass Thread::Current
349    bl  \cxx_name                   @ \cxx_name(Thread*)
350END \c_name
351.endm
352
353.macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
354    ldr \reg, [r9, #THREAD_EXCEPTION_OFFSET]   // Get exception field.
355    cbnz \reg, 1f
356    bx lr
3571:
358    DELIVER_PENDING_EXCEPTION
359.endm
360
361.macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
362    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1
363.endm
364
365.macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
366    RETURN_IF_RESULT_IS_ZERO
367    DELIVER_PENDING_EXCEPTION
368.endm
369
370.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
371    RETURN_IF_RESULT_IS_NON_ZERO
372    DELIVER_PENDING_EXCEPTION
373.endm
374
375// Macros taking opportunity of code similarities for downcalls.
376.macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
377    .extern \entrypoint
378ENTRY \name
379    SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
380    mov    r1, r9                        @ pass Thread::Current
381    bl     \entrypoint                   @ (uint32_t field_idx, Thread*)
382    RESTORE_SAVE_REFS_ONLY_FRAME
383    REFRESH_MARKING_REGISTER
384    \return
385END \name
386.endm
387
388.macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
389    .extern \entrypoint
390ENTRY \name
391    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
392    mov    r2, r9                        @ pass Thread::Current
393    bl     \entrypoint                   @ (field_idx, Object*, Thread*)
394    RESTORE_SAVE_REFS_ONLY_FRAME
395    REFRESH_MARKING_REGISTER
396    \return
397END \name
398.endm
399
400.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
401    .extern \entrypoint
402ENTRY \name
403    SETUP_SAVE_REFS_ONLY_FRAME r3        @ save callee saves in case of GC
404    mov    r3, r9                        @ pass Thread::Current
405    bl     \entrypoint                   @ (field_idx, Object*, new_val, Thread*)
406    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
407    REFRESH_MARKING_REGISTER
408    \return
409END \name
410.endm
411
412    /*
413     * Called by managed code, saves callee saves and then calls artThrowException
414     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
415     */
416ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
417
418    /*
419     * Called by managed code to create and deliver a NullPointerException.
420     */
421NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
422
423    /*
424     * Call installed by a signal handler to create and deliver a NullPointerException.
425     */
426    .extern art_quick_throw_null_pointer_exception_from_signal
427ENTRY art_quick_throw_null_pointer_exception_from_signal
428    // The fault handler pushes the gc map address, i.e. "return address", to stack
429    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
430    .cfi_def_cfa_offset __SIZEOF_POINTER__
431    .cfi_rel_offset lr, 0
432    push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
433    .cfi_adjust_cfa_offset 52
434    .cfi_rel_offset r0, 0
435    .cfi_rel_offset r1, 4
436    .cfi_rel_offset r2, 8
437    .cfi_rel_offset r3, 12
438    .cfi_rel_offset r4, 16
439    .cfi_rel_offset r5, 20
440    .cfi_rel_offset r6, 24
441    .cfi_rel_offset r7, 28
442    .cfi_rel_offset r8, 32
443    .cfi_rel_offset r9, 36
444    .cfi_rel_offset r10, 40
445    .cfi_rel_offset r11, 44
446    .cfi_rel_offset ip, 48
447
448    @ save all registers as basis for long jump context
449    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
450    mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
451    mov r1, r9                      @ pass Thread::Current
452    bl  artThrowNullPointerExceptionFromSignal  @ (Thread*)
453END art_quick_throw_null_pointer_exception_from_signal
454
455    /*
456     * Called by managed code to create and deliver an ArithmeticException.
457     */
458NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
459
460    /*
461     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
462     * index, arg2 holds limit.
463     */
464TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
465
466    /*
467     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
468     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
469     */
470TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
471
472    /*
473     * Called by managed code to create and deliver a StackOverflowError.
474     */
475NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
476
477    /*
478     * All generated callsites for interface invokes and invocation slow paths will load arguments
479     * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
480     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
481     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
482     *
483     * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
484     * of the target Method* in r0 and method->code_ in r1.
485     *
486     * If unsuccessful, the helper will return null/null. There will bea pending exception in the
487     * thread and we branch to another stub to deliver it.
488     *
489     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
490     * pointing back to the original caller.
491     *
492     * Clobbers IP (R12).
493     */
494.macro INVOKE_TRAMPOLINE_BODY cxx_name
495    .extern \cxx_name
496    SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
497    mov    r2, r9                         @ pass Thread::Current
498    mov    r3, sp
499    bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
500    mov    r12, r1                        @ save Method*->code_
501    RESTORE_SAVE_REFS_AND_ARGS_FRAME
502    REFRESH_MARKING_REGISTER
503    cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
504    bx     r12                            @ tail call to target
5051:
506    DELIVER_PENDING_EXCEPTION
507.endm
508.macro INVOKE_TRAMPOLINE c_name, cxx_name
509ENTRY \c_name
510    INVOKE_TRAMPOLINE_BODY \cxx_name
511END \c_name
512.endm
513
514INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
515
516INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
517INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
518INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
519INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
520
521    /*
522     * Quick invocation stub internal.
523     * On entry:
524     *   r0 = method pointer
525     *   r1 = argument array or null for no argument methods
526     *   r2 = size of argument array in bytes
527     *   r3 = (managed) thread pointer
528     *   [sp] = JValue* result
529     *   [sp + 4] = result_in_float
530     *   [sp + 8] = core register argument array
531     *   [sp + 12] = fp register argument array
532     *  +-------------------------+
533     *  | uint32_t* fp_reg_args   |
534     *  | uint32_t* core_reg_args |
535     *  |   result_in_float       | <- Caller frame
536     *  |   Jvalue* result        |
537     *  +-------------------------+
538     *  |          lr             |
539     *  |          r11            |
540     *  |          r9             |
541     *  |          r4             | <- r11
542     *  +-------------------------+
543     *  | uint32_t out[n-1]       |
544     *  |    :      :             |        Outs
545     *  | uint32_t out[0]         |
546     *  | StackRef<ArtMethod>     | <- SP  value=null
547     *  +-------------------------+
548     */
549ENTRY art_quick_invoke_stub_internal
550    SPILL_ALL_CALLEE_SAVE_GPRS             @ spill regs (9)
551    mov    r11, sp                         @ save the stack pointer
552    .cfi_def_cfa_register r11
553
554    mov    r9, r3                          @ move managed thread pointer into r9
555
556    add    r4, r2, #4                      @ create space for method pointer in frame
557    sub    r4, sp, r4                      @ reserve & align *stack* to 16 bytes: native calling
558    and    r4, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
559    mov    sp, r4                          @ 16B alignment ourselves.
560
561    mov    r4, r0                          @ save method*
562    add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
563    bl     memcpy                          @ memcpy (dest, src, bytes)
564    mov    ip, #0                          @ set ip to 0
565    str    ip, [sp]                        @ store null for method* at bottom of frame
566
567    ldr    ip, [r11, #48]                  @ load fp register argument array pointer
568    vldm   ip, {s0-s15}                    @ copy s0 - s15
569
570    ldr    ip, [r11, #44]                  @ load core register argument array pointer
571    mov    r0, r4                          @ restore method*
572    add    ip, ip, #4                      @ skip r0
573    ldm    ip, {r1-r3}                     @ copy r1 - r3
574
575    REFRESH_MARKING_REGISTER
576
577    ldr    ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]  @ get pointer to the code
578    blx    ip                              @ call the method
579
580    mov    sp, r11                         @ restore the stack pointer
581    .cfi_def_cfa_register sp
582
583    ldr    r4, [sp, #40]                   @ load result_is_float
584    ldr    r9, [sp, #36]                   @ load the result pointer
585    cmp    r4, #0
586    ite    eq
587    strdeq r0, [r9]                        @ store r0/r1 into result pointer
588    vstrne d0, [r9]                        @ store s0-s1/d0 into result pointer
589
590    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}               @ restore spill regs
591END art_quick_invoke_stub_internal
592
593    /*
594     * On stack replacement stub.
595     * On entry:
596     *   r0 = stack to copy
597     *   r1 = size of stack
598     *   r2 = pc to call
599     *   r3 = JValue* result
600     *   [sp] = shorty
601     *   [sp + 4] = thread
602     */
603ENTRY art_quick_osr_stub
604    SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
605    SAVE_SIZE=9*4
606    mov    r11, sp                         @ Save the stack pointer
607    .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
608    .cfi_remember_state
609    mov    r10, r1                         @ Save size of stack
610    ldr    r9, [r11, #40]                  @ Move managed thread pointer into r9
611    REFRESH_MARKING_REGISTER
612    mov    r6, r2                          @ Save the pc to call
613    sub    r7, sp, #12                     @ Reserve space for stack pointer,
614                                           @    JValue* result, and ArtMethod* slot.
615    and    r7, #0xFFFFFFF0                 @ Align stack pointer
616    mov    sp, r7                          @ Update stack pointer
617    str    r11, [sp, #4]                   @ Save old stack pointer
618    str    r3, [sp, #8]                    @ Save JValue* result
619    mov    ip, #0
620    str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
621    // r11 isn't properly spilled in the osr method, so we need use DWARF expression.
622    // NB: the CFI must be before the call since this is the address gdb will lookup.
623    // NB: gdb expects that cfa_expression returns the CFA value (not address to it).
624    .cfi_escape                            /* CFA = [sp + 4] + SAVE_SIZE */ \
625      0x0f, 6,                             /* DW_CFA_def_cfa_expression(len) */ \
626      0x92, 13, 4,                         /* DW_OP_bregx(reg,offset) */ \
627      0x06,                                /* DW_OP_deref */ \
628      0x23, SAVE_SIZE                      /* DW_OP_plus_uconst(val) */
629    bl     .Losr_entry                     @ Call the method
630    ldr    r10, [sp, #8]                   @ Restore JValue* result
631    ldr    sp, [sp, #4]                    @ Restore saved stack pointer
632    .cfi_def_cfa sp, SAVE_SIZE             @ CFA = sp + SAVE_SIZE
633    ldr    r4, [sp, #36]                   @ load shorty
634    ldrb   r4, [r4, #0]                    @ load return type
635    cmp    r4, #68                         @ Test if result type char == 'D'.
636    beq    .Losr_fp_result
637    cmp    r4, #70                         @ Test if result type char == 'F'.
638    beq    .Losr_fp_result
639    strd r0, [r10]                         @ Store r0/r1 into result pointer
640    b    .Losr_exit
641.Losr_fp_result:
642    vstr d0, [r10]                         @ Store s0-s1/d0 into result pointer
643.Losr_exit:
644    pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
645.Losr_entry:
646    .cfi_restore_state
647    .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
648    sub sp, sp, r10                        @ Reserve space for callee stack
649    sub r10, r10, #4
650    str lr, [sp, r10]                      @ Store link register per the compiler ABI
651    mov r2, r10
652    mov r1, r0
653    mov r0, sp
654    bl  memcpy                             @ memcpy (dest r0, src r1, bytes r2)
655    bx r6
656END art_quick_osr_stub
657
658    /*
659     * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
660     */
661ARM_ENTRY art_quick_do_long_jump
662    vldm r1, {s0-s31}     @ load all fprs from argument fprs_
663    ldr  r2, [r0, #60]    @ r2 = r15 (PC from gprs_ 60=4*15)
664    ldr  r14, [r0, #56]   @ (LR from gprs_ 56=4*14)
665    add  r0, r0, #12      @ increment r0 to skip gprs_[0..2] 12=4*3
666    ldm  r0, {r3-r13}     @ load remaining gprs from argument gprs_
667    REFRESH_MARKING_REGISTER
668    ldr  r0, [r0, #-12]   @ load r0 value
669    mov  r1, #0           @ clear result register r1
670    bx   r2               @ do long jump
671END art_quick_do_long_jump
672
673    /*
674     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
675     * failure.
676     */
677TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
678
679    /*
680     * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
681     * possibly null object to lock.
682     */
683    .extern artLockObjectFromCode
684ENTRY art_quick_lock_object
685    cbz    r0, .Lslow_lock
686.Lretry_lock:
687    ldr    r2, [r9, #THREAD_ID_OFFSET]
688    ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
689    mov    r3, r1
690    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
691    cbnz   r3, .Lnot_unlocked         @ already thin locked
692    @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
693    orr    r2, r1, r2                 @ r2 holds thread id with count of 0 with preserved read barrier bits
694    strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
695    cbnz   r3, .Llock_strex_fail      @ store failed, retry
696    dmb    ish                        @ full (LoadLoad|LoadStore) memory barrier
697    bx lr
698.Lnot_unlocked:  @ r1: original lock word, r2: thread_id with count of 0 and zero read barrier bits
699    lsr    r3, r1, LOCK_WORD_STATE_SHIFT
700    cbnz   r3, .Lslow_lock            @ if either of the top two bits are set, go slow path
701    eor    r2, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
702    uxth   r2, r2                     @ zero top 16 bits
703    cbnz   r2, .Lslow_lock            @ lock word and self thread id's match -> recursive lock
704                                      @ else contention, go to slow path
705    mov    r3, r1                     @ copy the lock word to check count overflow.
706    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits.
707    add    r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count in lock word placing in r2 to check overflow
708    lsr    r3, r2, #LOCK_WORD_GC_STATE_SHIFT    @ if the first gc state bit is set, we overflowed.
709    cbnz   r3, .Lslow_lock            @ if we overflow the count go slow path
710    add    r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count for real
711    strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
712    cbnz   r3, .Llock_strex_fail      @ strex failed, retry
713    bx lr
714.Llock_strex_fail:
715    b      .Lretry_lock               @ retry
716.Lslow_lock:
717    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
718    mov    r1, r9                     @ pass Thread::Current
719    bl     artLockObjectFromCode      @ (Object* obj, Thread*)
720    RESTORE_SAVE_REFS_ONLY_FRAME
721    REFRESH_MARKING_REGISTER
722    RETURN_IF_RESULT_IS_ZERO
723    DELIVER_PENDING_EXCEPTION
724END art_quick_lock_object
725
726ENTRY art_quick_lock_object_no_inline
727    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
728    mov    r1, r9                     @ pass Thread::Current
729    bl     artLockObjectFromCode      @ (Object* obj, Thread*)
730    RESTORE_SAVE_REFS_ONLY_FRAME
731    REFRESH_MARKING_REGISTER
732    RETURN_IF_RESULT_IS_ZERO
733    DELIVER_PENDING_EXCEPTION
734END art_quick_lock_object_no_inline
735
736    /*
737     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
738     * r0 holds the possibly null object to lock.
739     */
740    .extern artUnlockObjectFromCode
741ENTRY art_quick_unlock_object
742    cbz    r0, .Lslow_unlock
743.Lretry_unlock:
744#ifndef USE_READ_BARRIER
745    ldr    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
746#else
747    ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ Need to use atomic instructions for read barrier
748#endif
749    lsr    r2, r1, #LOCK_WORD_STATE_SHIFT
750    cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
751    ldr    r2, [r9, #THREAD_ID_OFFSET]
752    mov    r3, r1                     @ copy lock word to check thread id equality
753    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
754    eor    r3, r3, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
755    uxth   r3, r3                     @ zero top 16 bits
756    cbnz   r3, .Lslow_unlock          @ do lock word and self thread id's match?
757    mov    r3, r1                     @ copy lock word to detect transition to unlocked
758    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
759    cmp    r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
760    bpl    .Lrecursive_thin_unlock
761    @ transition to unlocked
762    mov    r3, r1
763    and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  @ r3: zero except for the preserved gc bits
764    dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
765#ifndef USE_READ_BARRIER
766    str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
767#else
768    strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
769    cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
770#endif
771    bx     lr
772.Lrecursive_thin_unlock:  @ r1: original lock word
773    sub    r1, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ decrement count
774#ifndef USE_READ_BARRIER
775    str    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
776#else
777    strex  r2, r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
778    cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
779#endif
780    bx     lr
781.Lunlock_strex_fail:
782    b      .Lretry_unlock             @ retry
783.Lslow_unlock:
784    @ save callee saves in case exception allocation triggers GC
785    SETUP_SAVE_REFS_ONLY_FRAME r1
786    mov    r1, r9                     @ pass Thread::Current
787    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
788    RESTORE_SAVE_REFS_ONLY_FRAME
789    REFRESH_MARKING_REGISTER
790    RETURN_IF_RESULT_IS_ZERO
791    DELIVER_PENDING_EXCEPTION
792END art_quick_unlock_object
793
794ENTRY art_quick_unlock_object_no_inline
795    @ save callee saves in case exception allocation triggers GC
796    SETUP_SAVE_REFS_ONLY_FRAME r1
797    mov    r1, r9                     @ pass Thread::Current
798    bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
799    RESTORE_SAVE_REFS_ONLY_FRAME
800    REFRESH_MARKING_REGISTER
801    RETURN_IF_RESULT_IS_ZERO
802    DELIVER_PENDING_EXCEPTION
803END art_quick_unlock_object_no_inline
804
805    /*
806     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
807     * artThrowClassCastExceptionForObject.
808     */
809    .extern artInstanceOfFromCode
810    .extern artThrowClassCastExceptionForObject
811ENTRY art_quick_check_instance_of
812    push {r0-r2, lr}                    @ save arguments, padding (r2) and link register
813    .cfi_adjust_cfa_offset 16
814    .cfi_rel_offset r0, 0
815    .cfi_rel_offset r1, 4
816    .cfi_rel_offset r2, 8
817    .cfi_rel_offset lr, 12
818    bl artInstanceOfFromCode
819    cbz    r0, .Lthrow_class_cast_exception
820    pop {r0-r2, pc}
821
822.Lthrow_class_cast_exception:
823    pop {r0-r2, lr}
824    .cfi_adjust_cfa_offset -16
825    .cfi_restore r0
826    .cfi_restore r1
827    .cfi_restore r2
828    .cfi_restore lr
829
830    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
831    mov r2, r9                      @ pass Thread::Current
832    bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
833    bkpt
834END art_quick_check_instance_of
835
836// Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
837.macro POP_REG_NE rReg, offset, rExclude
838    .ifnc \rReg, \rExclude
839        ldr \rReg, [sp, #\offset]   @ restore rReg
840        .cfi_restore \rReg
841    .endif
842.endm
843
844// Save rReg's value to [sp, #offset].
845.macro PUSH_REG rReg, offset
846    str \rReg, [sp, #\offset]       @ save rReg
847    .cfi_rel_offset \rReg, \offset
848.endm
849
850    /*
851     * Macro to insert read barrier, only used in art_quick_aput_obj.
852     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
853     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
854     */
855.macro READ_BARRIER rDest, rObj, offset
856#ifdef USE_READ_BARRIER
857    push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
858    .cfi_adjust_cfa_offset 24
859    .cfi_rel_offset r0, 0
860    .cfi_rel_offset r1, 4
861    .cfi_rel_offset r2, 8
862    .cfi_rel_offset r3, 12
863    .cfi_rel_offset ip, 16
864    .cfi_rel_offset lr, 20
865    sub sp, #8                      @ push padding
866    .cfi_adjust_cfa_offset 8
867    @ mov r0, \rRef                 @ pass ref in r0 (no-op for now since parameter ref is unused)
868    .ifnc \rObj, r1
869        mov r1, \rObj               @ pass rObj
870    .endif
871    mov r2, #\offset                @ pass offset
872    bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
873    @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
874    .ifnc \rDest, r0
875        mov \rDest, r0              @ save return value in rDest
876    .endif
877    add sp, #8                      @ pop padding
878    .cfi_adjust_cfa_offset -8
879    POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
880    POP_REG_NE r1, 4, \rDest
881    POP_REG_NE r2, 8, \rDest
882    POP_REG_NE r3, 12, \rDest
883    POP_REG_NE ip, 16, \rDest
884    add sp, #20
885    .cfi_adjust_cfa_offset -20
886    pop {lr}                        @ restore lr
887    .cfi_adjust_cfa_offset -4
888    .cfi_restore lr
889#else
890    ldr \rDest, [\rObj, #\offset]
891    UNPOISON_HEAP_REF \rDest
892#endif  // USE_READ_BARRIER
893.endm
894
895#ifdef USE_READ_BARRIER
896    .extern artReadBarrierSlow
897#endif
898    .hidden art_quick_aput_obj
899ENTRY art_quick_aput_obj
900#ifdef USE_READ_BARRIER
901    @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro.
902    tst r2, r2
903    beq .Ldo_aput_null
904#else
905    cbz r2, .Ldo_aput_null
906#endif  // USE_READ_BARRIER
907    READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET
908    READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET
909    READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
910    cmp r3, ip  @ value's type == array's component type - trivial assignability
911    bne .Lcheck_assignability
912.Ldo_aput:
913    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
914    POISON_HEAP_REF r2
915    str r2, [r3, r1, lsl #2]
916    ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
917    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
918    strb r3, [r3, r0]
919    blx lr
920.Ldo_aput_null:
921    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
922    str r2, [r3, r1, lsl #2]
923    blx lr
924.Lcheck_assignability:
925    push {r0-r2, lr}             @ save arguments
926    .cfi_adjust_cfa_offset 16
927    .cfi_rel_offset r0, 0
928    .cfi_rel_offset r1, 4
929    .cfi_rel_offset r2, 8
930    .cfi_rel_offset lr, 12
931    mov r1, ip
932    mov r0, r3
933    bl artIsAssignableFromCode
934    cbz r0, .Lthrow_array_store_exception
935    pop {r0-r2, lr}
936    .cfi_restore r0
937    .cfi_restore r1
938    .cfi_restore r2
939    .cfi_restore lr
940    .cfi_adjust_cfa_offset -16
941    add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
942    POISON_HEAP_REF r2
943    str r2, [r3, r1, lsl #2]
944    ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
945    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
946    strb r3, [r3, r0]
947    blx lr
948.Lthrow_array_store_exception:
949    pop {r0-r2, lr}
950    /* No need to repeat restore cfi directives, the ones above apply here. */
951    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
952    mov r1, r2
953    mov r2, r9                     @ pass Thread::Current
954    bl artThrowArrayStoreException @ (Class*, Class*, Thread*)
955    bkpt                           @ unreached
956END art_quick_aput_obj
957
958// Macro to facilitate adding new allocation entrypoints.
959.macro ONE_ARG_DOWNCALL name, entrypoint, return
960    .extern \entrypoint
961ENTRY \name
962    SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case of GC
963    mov    r1, r9                     @ pass Thread::Current
964    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
965    RESTORE_SAVE_REFS_ONLY_FRAME
966    REFRESH_MARKING_REGISTER
967    \return
968END \name
969.endm
970
971// Macro to facilitate adding new allocation entrypoints.
972.macro TWO_ARG_DOWNCALL name, entrypoint, return
973    .extern \entrypoint
974ENTRY \name
975    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
976    mov    r2, r9                     @ pass Thread::Current
977    bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
978    RESTORE_SAVE_REFS_ONLY_FRAME
979    REFRESH_MARKING_REGISTER
980    \return
981END \name
982.endm
983
984// Macro to facilitate adding new array allocation entrypoints.
985.macro THREE_ARG_DOWNCALL name, entrypoint, return
986    .extern \entrypoint
987ENTRY \name
988    SETUP_SAVE_REFS_ONLY_FRAME r3     @ save callee saves in case of GC
989    mov    r3, r9                     @ pass Thread::Current
990    @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
991    bl     \entrypoint
992    RESTORE_SAVE_REFS_ONLY_FRAME
993    REFRESH_MARKING_REGISTER
994    \return
995END \name
996.endm
997
998// Macro to facilitate adding new allocation entrypoints.
999.macro FOUR_ARG_DOWNCALL name, entrypoint, return
1000    .extern \entrypoint
1001ENTRY \name
1002    SETUP_SAVE_REFS_ONLY_FRAME r12    @ save callee saves in case of GC
1003    str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
1004    .cfi_adjust_cfa_offset 16
1005    bl     \entrypoint
1006    add    sp, #16                    @ strip the extra frame
1007    .cfi_adjust_cfa_offset -16
1008    RESTORE_SAVE_REFS_ONLY_FRAME
1009    REFRESH_MARKING_REGISTER
1010    \return
1011END \name
1012.endm
1013
1014// Macro for string and type resolution and initialization.
1015.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
1016    .extern \entrypoint
1017ENTRY \name
1018    SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset    @ save everything in case of GC
1019    mov    r1, r9                     @ pass Thread::Current
1020    bl     \entrypoint                @ (uint32_t index, Thread*)
1021    cbz    r0, 1f                     @ If result is null, deliver the OOME.
1022    .cfi_remember_state
1023    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
1024    REFRESH_MARKING_REGISTER
1025    bx     lr
1026    .cfi_restore_state
10271:
1028    DELIVER_PENDING_EXCEPTION_FRAME_READY
1029END \name
1030.endm
1031
1032.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
1033    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
1034.endm
1035
1036ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1037ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
1038ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
1039ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1040
1041// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
1042// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1043
1044    /*
1045     * Called by managed code to resolve a static field and load a non-wide value.
1046     */
1047ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1048ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1049ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1050ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1051ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1052ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1053    /*
1054     * Called by managed code to resolve a static field and load a 64-bit primitive value.
1055     */
1056    .extern artGet64StaticFromCompiledCode
1057ENTRY art_quick_get64_static
1058    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
1059    mov    r1, r9                        @ pass Thread::Current
1060    bl     artGet64StaticFromCompiledCode        @ (uint32_t field_idx, Thread*)
1061    ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1062    RESTORE_SAVE_REFS_ONLY_FRAME
1063    REFRESH_MARKING_REGISTER
1064    cbnz   r2, 1f                        @ success if no exception pending
1065    bx     lr                            @ return on success
10661:
1067    DELIVER_PENDING_EXCEPTION
1068END art_quick_get64_static
1069
1070    /*
1071     * Called by managed code to resolve an instance field and load a non-wide value.
1072     */
1073TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1074TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1075TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1076TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1077TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1078TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
1079    /*
1080     * Called by managed code to resolve an instance field and load a 64-bit primitive value.
1081     */
1082    .extern artGet64InstanceFromCompiledCode
1083ENTRY art_quick_get64_instance
1084    SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
1085    mov    r2, r9                        @ pass Thread::Current
1086    bl     artGet64InstanceFromCompiledCode      @ (field_idx, Object*, Thread*)
1087    ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1088    RESTORE_SAVE_REFS_ONLY_FRAME
1089    REFRESH_MARKING_REGISTER
1090    cbnz   r2, 1f                        @ success if no exception pending
1091    bx     lr                            @ return on success
10921:
1093    DELIVER_PENDING_EXCEPTION
1094END art_quick_get64_instance
1095
1096    /*
1097     * Called by managed code to resolve a static field and store a value.
1098     */
1099TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1100TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1101TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1102TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1103
1104    /*
1105     * Called by managed code to resolve an instance field and store a non-wide value.
1106     */
1107THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1108THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1109THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1110THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
1111
1112    /*
1113     * Called by managed code to resolve an instance field and store a wide value.
1114     */
1115    .extern artSet64InstanceFromCompiledCode
1116ENTRY art_quick_set64_instance
1117    SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
1118                                         @ r2:r3 contain the wide argument
1119    str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
1120    .cfi_adjust_cfa_offset 16
1121    bl     artSet64InstanceFromCompiledCode      @ (field_idx, Object*, new_val, Thread*)
1122    add    sp, #16                       @ release out args
1123    .cfi_adjust_cfa_offset -16
1124    RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
1125    REFRESH_MARKING_REGISTER
1126    RETURN_IF_RESULT_IS_ZERO
1127    DELIVER_PENDING_EXCEPTION
1128END art_quick_set64_instance
1129
1130    .extern artSet64StaticFromCompiledCode
1131ENTRY art_quick_set64_static
1132    SETUP_SAVE_REFS_ONLY_FRAME r12        @ save callee saves in case of GC
1133                                          @ r2:r3 contain the wide argument
1134    str    r9, [sp, #-16]!                @ expand the frame and pass Thread::Current
1135    .cfi_adjust_cfa_offset 16
1136    bl     artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*)
1137    add    sp, #16                        @ release out args
1138    .cfi_adjust_cfa_offset -16
1139    RESTORE_SAVE_REFS_ONLY_FRAME          @ TODO: we can clearly save an add here
1140    REFRESH_MARKING_REGISTER
1141    RETURN_IF_RESULT_IS_ZERO
1142    DELIVER_PENDING_EXCEPTION
1143END art_quick_set64_static
1144
1145// Generate the allocation entrypoints for each allocator.
1146GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1147// Comment out allocators that have arm specific asm.
1148// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1149// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1150GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1151// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1152// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1153// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1154// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1155// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1156GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1157GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1158GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1159
1160// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1161// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1162GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1163// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1164// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1165// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1166// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1167// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1168GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1169GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1170GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1171
1172// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc).
1173//
1174// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1175// If isInitialized=0 the compiler can only assume it's been at least resolved.
1176.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
1177ENTRY \c_name
1178    // Fast path rosalloc allocation.
1179    // r0: type/return value, r9: Thread::Current
1180    // r1, r2, r3, r12: free.
1181    ldr    r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]     // Check if the thread local
1182                                                              // allocation stack has room.
1183                                                              // TODO: consider using ldrd.
1184    ldr    r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
1185    cmp    r3, r12
1186    bhs    .Lslow_path\c_name
1187
1188    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3)
1189    cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1190                                                              // local allocation. Also does the
1191                                                              // initialized and finalizable checks.
1192    // When isInitialized == 0, then the class is potentially not yet initialized.
1193    // If the class is not yet initialized, the object size will be very large to force the branch
1194    // below to be taken.
1195    //
1196    // See InitializeClassVisitors in class-inl.h for more details.
1197    bhs    .Lslow_path\c_name
1198                                                              // Compute the rosalloc bracket index
1199                                                              // from the size. Since the size is
1200                                                              // already aligned we can combine the
1201                                                              // two shifts together.
1202    add    r12, r9, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
1203                                                              // Subtract pointer size since ther
1204                                                              // are no runs for 0 byte allocations
1205                                                              // and the size is already aligned.
1206                                                              // Load the rosalloc run (r12)
1207    ldr    r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
1208                                                              // Load the free list head (r3). This
1209                                                              // will be the return val.
1210    ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1211    cbz    r3, .Lslow_path\c_name
1212    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1213    ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1214                                                              // and update the list head with the
1215                                                              // next pointer.
1216    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1217                                                              // Store the class pointer in the
1218                                                              // header. This also overwrites the
1219                                                              // next pointer. The offsets are
1220                                                              // asserted to match.
1221#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1222#error "Class pointer needs to overwrite next pointer."
1223#endif
1224    POISON_HEAP_REF r0
1225    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
1226                                                              // Push the new object onto the thread
1227                                                              // local allocation stack and
1228                                                              // increment the thread local
1229                                                              // allocation stack top.
1230    ldr    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1231    str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
1232    str    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1233                                                              // Decrement the size of the free list
1234
1235    // After this "STR" the object is published to the thread local allocation stack,
1236    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1237    // It is not yet visible to the running (user) compiled code until after the return.
1238    //
1239    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1240    // the state of the allocation stack slot. It can be a pointer to one of:
1241    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
1242    //       (The stack initial state is "null" pointers).
1243    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
1244    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1245    // Other states are not allowed.
1246    //
1247    // An object that is invalid only temporarily, and will eventually become valid.
1248    // The internal runtime code simply checks if the object is not null or is partial and then
1249    // ignores it.
1250    //
1251    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
1252    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
1253    // "next" pointer is not-cyclic.)
1254    //
1255    // See also b/28790624 for a listing of CLs dealing with this race.
1256    ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1257    sub    r1, #1
1258                                                              // TODO: consider combining this store
1259                                                              // and the list head store above using
1260                                                              // strd.
1261    str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1262
1263    mov    r0, r3                                             // Set the return value and return.
1264.if \isInitialized == 0
1265    // This barrier is only necessary when the allocation also requires
1266    // a class initialization check.
1267    //
1268    // If the class is already observably initialized, then new-instance allocations are protected
1269    // from publishing by the compiler which inserts its own StoreStore barrier.
1270    dmb    ish
1271    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1272    // they should happen-after the implicit initialization check.
1273    //
1274    // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
1275    // a new observably-initialized class state.
1276.endif
1277    bx     lr
1278
1279.Lslow_path\c_name:
1280    SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
1281    mov    r1, r9                     @ pass Thread::Current
1282    bl     \cxx_name                  @ (mirror::Class* cls, Thread*)
1283    RESTORE_SAVE_REFS_ONLY_FRAME
1284    REFRESH_MARKING_REGISTER
1285    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1286END \c_name
1287.endm
1288
1289ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1290ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1291
1292// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
1293// and art_quick_alloc_object_resolved/initialized_region_tlab.
1294//
1295// r0: type r9: Thread::Current, r1, r2, r3, r12: free.
1296// Need to preserve r0 to the slow path.
1297//
1298// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1299// If isInitialized=0 the compiler can only assume it's been at least resolved.
1300.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized
1301                                                             // Load thread_local_pos (r12) and
1302                                                             // thread_local_end (r3) with ldrd.
1303                                                             // Check constraints for ldrd.
1304#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1305#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1306#endif
1307    ldrd   r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET]
1308    sub    r12, r3, r12                                       // Compute the remaining buf size.
1309    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3).
1310    cmp    r3, r12                                            // Check if it fits.
1311    // When isInitialized == 0, then the class is potentially not yet initialized.
1312    // If the class is not yet initialized, the object size will be very large to force the branch
1313    // below to be taken.
1314    //
1315    // See InitializeClassVisitors in class-inl.h for more details.
1316    bhi    \slowPathLabel
1317    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1318                                                              // Reload old thread_local_pos (r0)
1319                                                              // for the return value.
1320    ldr    r2, [r9, #THREAD_LOCAL_POS_OFFSET]
1321    add    r1, r2, r3
1322    str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
1323    // After this "STR" the object is published to the thread local allocation stack,
1324    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1325    // It is not yet visible to the running (user) compiled code until after the return.
1326    //
1327    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1328    // the state of the object. It can be either:
1329    // 1) A partially valid object, with a null class pointer
1330    //       (because the initial state of TLAB buffers is all 0s/nulls).
1331    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1332    // Other states are not allowed.
1333    //
1334    // An object that is invalid only temporarily, and will eventually become valid.
1335    // The internal runtime code simply checks if the object is not null or is partial and then
1336    // ignores it.
1337    //
1338    // (Note: The actual check is done by checking that the object's class pointer is non-null.
1339    // Also, unlike rosalloc, the object can never be observed as null).
1340    ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
1341    add    r1, r1, #1
1342    str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
1343    POISON_HEAP_REF r0
1344    str    r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1345                                                              // Fence. This is "ish" not "ishst" so
1346                                                              // that the code after this allocation
1347                                                              // site will see the right values in
1348                                                              // the fields of the class.
1349    mov    r0, r2
1350.if \isInitialized == 0
1351    // This barrier is only necessary when the allocation also requires
1352    // a class initialization check.
1353    //
1354    // If the class is already observably initialized, then new-instance allocations are protected
1355    // from publishing by the compiler which inserts its own StoreStore barrier.
1356    dmb    ish
1357    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1358    // they should happen-after the implicit initialization check.
1359    //
1360    // TODO: Remove dmb for class initialization checks (b/36692143)
1361.endif
1362    bx     lr
1363.endm
1364
1365// The common code for art_quick_alloc_object_*region_tlab
1366.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1367ENTRY \name
1368    // Fast path tlab allocation.
1369    // r0: type, r9: Thread::Current
1370    // r1, r2, r3, r12: free.
1371    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized
1372.Lslow_path\name:
1373    SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
1374    mov    r1, r9                                             // Pass Thread::Current.
1375    bl     \entrypoint                                        // (mirror::Class* klass, Thread*)
1376    RESTORE_SAVE_REFS_ONLY_FRAME
1377    REFRESH_MARKING_REGISTER
1378    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1379END \name
1380.endm
1381
1382GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1383GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1384GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1385GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1386
1387
1388// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab
1389// and art_quick_alloc_array_resolved/initialized_region_tlab.
1390//
1391// r0: type r1: component_count r2: total_size r9: Thread::Current, r3, r12: free.
1392// Need to preserve r0 and r1 to the slow path.
1393.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
1394    and    r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED             // Apply alignment mask
1395                                                              // (addr + 7) & ~7.
1396
1397                                                              // Load thread_local_pos (r3) and
1398                                                              // thread_local_end (r12) with ldrd.
1399                                                              // Check constraints for ldrd.
1400#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
1401#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
1402#endif
1403    ldrd   r3, r12, [r9, #THREAD_LOCAL_POS_OFFSET]
1404    sub    r12, r12, r3                                       // Compute the remaining buf size.
1405    cmp    r2, r12                                            // Check if the total_size fits.
1406    // The array class is always initialized here. Unlike new-instance,
1407    // this does not act as a double test.
1408    bhi    \slowPathLabel
1409    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
1410    add    r2, r2, r3
1411    str    r2, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
1412    ldr    r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
1413    add    r2, r2, #1
1414    str    r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
1415    POISON_HEAP_REF r0
1416    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1417    str    r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET]              // Store the array length.
1418                                                              // Fence. This is "ish" not "ishst" so
1419                                                              // that the code after this allocation
1420                                                              // site will see the right values in
1421                                                              // the fields of the class.
1422    mov    r0, r3
1423// new-array is special. The class is loaded and immediately goes to the Initialized state
1424// before it is published. Therefore the only fence needed is for the publication of the object.
1425// See ClassLinker::CreateArrayClass() for more details.
1426
1427// For publication of the new array, we don't need a 'dmb ishst' here.
1428// The compiler generates 'dmb ishst' for all new-array insts.
1429    bx     lr
1430.endm
1431
1432.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1433ENTRY \name
1434    // Fast path array allocation for region tlab allocation.
1435    // r0: mirror::Class* type
1436    // r1: int32_t component_count
1437    // r9: thread
1438    // r2, r3, r12: free.
1439    \size_setup .Lslow_path\name
1440    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name
1441.Lslow_path\name:
1442    // r0: mirror::Class* klass
1443    // r1: int32_t component_count
1444    // r2: Thread* self
1445    SETUP_SAVE_REFS_ONLY_FRAME r2  // save callee saves in case of GC
1446    mov    r2, r9                  // pass Thread::Current
1447    bl     \entrypoint
1448    RESTORE_SAVE_REFS_ONLY_FRAME
1449    REFRESH_MARKING_REGISTER
1450    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1451END \name
1452.endm
1453
1454.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
1455    bkpt                                                    // We should never enter here.
1456                                                            // Code below is for reference.
1457                                                            // Possibly a large object, go slow.
1458                                                            // Also does negative array size check.
1459    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
1460    cmp r1, r2
1461    bhi \slow_path
1462                                                            // Array classes are never finalizable
1463                                                            // or uninitialized, no need to check.
1464    ldr    r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Load component type
1465    UNPOISON_HEAP_REF r3
1466    ldr    r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1467    lsr    r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT         // Component size shift is in high 16
1468                                                            // bits.
1469    lsl    r2, r1, r3                                       // Calculate data size
1470                                                            // Add array data offset and alignment.
1471    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1472#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1473#error Long array data offset must be 4 greater than int array data offset.
1474#endif
1475
1476    add    r3, r3, #1                                       // Add 4 to the length only if the
1477                                                            // component size shift is 3
1478                                                            // (for 64 bit alignment).
1479    and    r3, r3, #4
1480    add    r2, r2, r3
1481.endm
1482
1483.macro COMPUTE_ARRAY_SIZE_8 slow_path
1484    // Possibly a large object, go slow.
1485    // Also does negative array size check.
1486    movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET)
1487    cmp r1, r2
1488    bhi \slow_path
1489    // Add array data offset and alignment.
1490    add    r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1491.endm
1492
1493.macro COMPUTE_ARRAY_SIZE_16 slow_path
1494    // Possibly a large object, go slow.
1495    // Also does negative array size check.
1496    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2)
1497    cmp r1, r2
1498    bhi \slow_path
1499    lsl    r2, r1, #1
1500    // Add array data offset and alignment.
1501    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1502.endm
1503
1504.macro COMPUTE_ARRAY_SIZE_32 slow_path
1505    // Possibly a large object, go slow.
1506    // Also does negative array size check.
1507    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4)
1508    cmp r1, r2
1509    bhi \slow_path
1510    lsl    r2, r1, #2
1511    // Add array data offset and alignment.
1512    add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1513.endm
1514
1515.macro COMPUTE_ARRAY_SIZE_64 slow_path
1516    // Possibly a large object, go slow.
1517    // Also does negative array size check.
1518    movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8)
1519    cmp r1, r2
1520    bhi \slow_path
1521    lsl    r2, r1, #3
1522    // Add array data offset and alignment.
1523    add    r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1524.endm
1525
1526// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove
1527// the entrypoint once all backends have been updated to use the size variants.
1528GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1529GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1530GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1531GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1532GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1533GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1534GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1535GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1536GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1537GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1538
1539    /*
1540     * Called by managed code when the value in rSUSPEND has been decremented to 0.
1541     */
1542    .extern artTestSuspendFromCode
1543ENTRY art_quick_test_suspend
1544    SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl
1545    mov    r0, rSELF
1546    bl     artTestSuspendFromCode               @ (Thread*)
1547    RESTORE_SAVE_EVERYTHING_FRAME
1548    REFRESH_MARKING_REGISTER
1549    bx     lr
1550END art_quick_test_suspend
1551
1552ENTRY art_quick_implicit_suspend
1553    mov    r0, rSELF
1554    SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
1555    bl     artTestSuspendFromCode             @ (Thread*)
1556    RESTORE_SAVE_REFS_ONLY_FRAME
1557    REFRESH_MARKING_REGISTER
1558    bx     lr
1559END art_quick_implicit_suspend
1560
1561    /*
1562     * Called by managed code that is attempting to call a method on a proxy class. On entry
1563     * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The
1564     * frame size of the invoked proxy method agrees with a ref and args callee save frame.
1565     */
1566     .extern artQuickProxyInvokeHandler
1567ENTRY art_quick_proxy_invoke_handler
1568    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1569    mov     r2, r9                 @ pass Thread::Current
1570    mov     r3, sp                 @ pass SP
1571    blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
1572    ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1573    // Tear down the callee-save frame. Skip arg registers.
1574    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1575    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1576    RESTORE_SAVE_REFS_ONLY_FRAME
1577    REFRESH_MARKING_REGISTER
1578    cbnz    r2, 1f                 @ success if no exception is pending
1579    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1580    bx      lr                     @ return on success
15811:
1582    DELIVER_PENDING_EXCEPTION
1583END art_quick_proxy_invoke_handler
1584
1585    /*
1586     * Called to resolve an imt conflict.
1587     * r0 is the conflict ArtMethod.
1588     * r12 is a hidden argument that holds the target interface method's dex method index.
1589     *
1590     * Note that this stub writes to r0, r4, and r12.
1591     */
1592    .extern artLookupResolvedMethod
1593ENTRY art_quick_imt_conflict_trampoline
1594    push    {r1-r2}
1595    .cfi_adjust_cfa_offset (2 * 4)
1596    .cfi_rel_offset r1, 0
1597    .cfi_rel_offset r2, 4
1598    ldr     r4, [sp, #(2 * 4)]  // Load referrer.
1599    ldr     r2, [r0, #ART_METHOD_JNI_OFFSET_32]  // Load ImtConflictTable
1600    // Load the declaring class (without read barrier) and access flags (for obsolete method check).
1601    // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
1602#if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4
1603#error "Expecting declaring class and access flags to be consecutive for LDRD."
1604#endif
1605    ldrd    r0, r1, [r4, #ART_METHOD_DECLARING_CLASS_OFFSET]
1606    // If the method is obsolete, just go through the dex cache miss slow path.
1607    lsrs    r1, #(ACC_OBSOLETE_METHOD_SHIFT + 1)
1608    bcs     .Limt_conflict_trampoline_dex_cache_miss
1609    ldr     r4, [r0, #MIRROR_CLASS_DEX_CACHE_OFFSET]  // Load the DexCache (without read barrier).
1610    UNPOISON_HEAP_REF r4
1611    ubfx    r1, r12, #0, #METHOD_DEX_CACHE_HASH_BITS  // Calculate DexCache method slot index.
1612    ldr     r4, [r4, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET]  // Load the resolved methods.
1613    add     r4, r4, r1, lsl #(POINTER_SIZE_SHIFT + 1)  // Load DexCache method slot address.
1614
1615// FIXME: Configure the build to use the faster code when appropriate.
1616//        Currently we fall back to the slower version.
1617#if HAS_ATOMIC_LDRD
1618    ldrd    r0, r1, [r4]
1619#else
1620    push    {r3}
1621    .cfi_adjust_cfa_offset 4
1622    .cfi_rel_offset r3, 0
1623.Limt_conflict_trampoline_retry_load:
1624    ldrexd  r0, r1, [r4]
1625    strexd  r3, r0, r1, [r4]
1626    cmp     r3, #0
1627    bne     .Limt_conflict_trampoline_retry_load
1628    pop     {r3}
1629    .cfi_adjust_cfa_offset -4
1630    .cfi_restore r3
1631#endif
1632
1633    ldr     r4, [r2]  // Load first entry in ImtConflictTable.
1634    cmp     r1, r12   // Compare method index to see if we had a DexCache method hit.
1635    bne     .Limt_conflict_trampoline_dex_cache_miss
1636.Limt_table_iterate:
1637    cmp     r4, r0
1638    // Branch if found. Benchmarks have shown doing a branch here is better.
1639    beq     .Limt_table_found
1640    // If the entry is null, the interface method is not in the ImtConflictTable.
1641    cbz     r4, .Lconflict_trampoline
1642    // Iterate over the entries of the ImtConflictTable.
1643    ldr     r4, [r2, #(2 * __SIZEOF_POINTER__)]!
1644    b .Limt_table_iterate
1645.Limt_table_found:
1646    // We successfully hit an entry in the table. Load the target method
1647    // and jump to it.
1648    ldr     r0, [r2, #__SIZEOF_POINTER__]
1649    .cfi_remember_state
1650    pop     {r1-r2}
1651    .cfi_adjust_cfa_offset -(2 * 4)
1652    .cfi_restore r1
1653    .cfi_restore r2
1654    ldr     pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
1655    .cfi_restore_state
1656.Lconflict_trampoline:
1657    // Call the runtime stub to populate the ImtConflictTable and jump to the
1658    // resolved method.
1659    .cfi_remember_state
1660    pop     {r1-r2}
1661    .cfi_adjust_cfa_offset -(2 * 4)
1662    .cfi_restore r1
1663    .cfi_restore r2
1664    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1665    .cfi_restore_state
1666.Limt_conflict_trampoline_dex_cache_miss:
1667    // We're not creating a proper runtime method frame here,
1668    // artLookupResolvedMethod() is not allowed to walk the stack.
1669
1670    // Save ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
1671    push    {r2-r4, lr}
1672    .cfi_adjust_cfa_offset (4 * 4)
1673    .cfi_rel_offset r3, 4
1674    .cfi_rel_offset lr, 12
1675    // Save FPR args.
1676    vpush   {d0-d7}
1677    .cfi_adjust_cfa_offset (8 * 8)
1678
1679    mov     r0, ip                      // Pass method index.
1680    ldr     r1, [sp, #(8 * 8 + 6 * 4)]  // Pass referrer.
1681    bl      artLookupResolvedMethod     // (uint32_t method_index, ArtMethod* referrer)
1682
1683    // Restore FPR args.
1684    vpop    {d0-d7}
1685    .cfi_adjust_cfa_offset -(8 * 8)
1686    // Restore ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
1687    pop     {r2-r4, lr}
1688    .cfi_adjust_cfa_offset -(4 * 4)
1689    .cfi_restore r3
1690    .cfi_restore lr
1691
1692    cmp     r0, #0                  // If the method wasn't resolved,
1693    beq     .Lconflict_trampoline   //   skip the lookup and go to artInvokeInterfaceTrampoline().
1694    b       .Limt_table_iterate
1695END art_quick_imt_conflict_trampoline
1696
1697    .extern artQuickResolutionTrampoline
1698ENTRY art_quick_resolution_trampoline
1699    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
1700    mov     r2, r9                 @ pass Thread::Current
1701    mov     r3, sp                 @ pass SP
1702    blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
1703    cbz     r0, 1f                 @ is code pointer null? goto exception
1704    mov     r12, r0
1705    ldr     r0, [sp, #0]           @ load resolved method in r0
1706    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1707    REFRESH_MARKING_REGISTER
1708    bx      r12                    @ tail-call into actual code
17091:
1710    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1711    DELIVER_PENDING_EXCEPTION
1712END art_quick_resolution_trampoline
1713
1714    /*
1715     * Called to do a generic JNI down-call
1716     */
1717ENTRY art_quick_generic_jni_trampoline
1718    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
1719
1720    // Save rSELF
1721    mov r11, rSELF
1722    // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
1723    mov r10, sp
1724    .cfi_def_cfa_register r10
1725
1726    sub sp, sp, #5120
1727
1728    // prepare for artQuickGenericJniTrampoline call
1729    // (Thread*,  SP)
1730    //    r0      r1   <= C calling convention
1731    //  rSELF     r10  <= where they are
1732
1733    mov r0, rSELF   // Thread*
1734    mov r1, r10
1735    blx artQuickGenericJniTrampoline  // (Thread*, sp)
1736
1737    // The C call will have registered the complete save-frame on success.
1738    // The result of the call is:
1739    // r0: pointer to native code, 0 on error.
1740    // r1: pointer to the bottom of the used area of the alloca, can restore stack till there.
1741
1742    // Check for error = 0.
1743    cbz r0, .Lexception_in_native
1744
1745    // Release part of the alloca.
1746    mov sp, r1
1747
1748    // Save the code pointer
1749    mov r12, r0
1750
1751    // Load parameters from frame into registers.
1752    pop {r0-r3}
1753
1754    // Softfloat.
1755    // TODO: Change to hardfloat when supported.
1756
1757    blx r12           // native call.
1758
1759    // result sign extension is handled in C code
1760    // prepare for artQuickGenericJniEndTrampoline call
1761    // (Thread*, result, result_f)
1762    //    r0      r2,r3    stack       <= C calling convention
1763    //    r11     r0,r1    r0,r1          <= where they are
1764    sub sp, sp, #8 // Stack alignment.
1765
1766    push {r0-r1}
1767    mov r3, r1
1768    mov r2, r0
1769    mov r0, r11
1770
1771    blx artQuickGenericJniEndTrampoline
1772
1773    // Restore self pointer.
1774    mov r9, r11
1775
1776    // Pending exceptions possible.
1777    ldr r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1778    cbnz r2, .Lexception_in_native
1779
1780    // Tear down the alloca.
1781    mov sp, r10
1782    .cfi_def_cfa_register sp
1783
1784    // Tear down the callee-save frame. Skip arg registers.
1785    add     sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
1786    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY)
1787    RESTORE_SAVE_REFS_ONLY_FRAME
1788    REFRESH_MARKING_REGISTER
1789
1790    // store into fpr, for when it's a fpr return...
1791    vmov d0, r0, r1
1792    bx lr      // ret
1793    // Undo the unwinding information from above since it doesn't apply below.
1794    .cfi_def_cfa_register r10
1795    .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
1796
1797.Lexception_in_native:
1798    ldr ip, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]
1799    add ip, ip, #-1  // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE.
1800    mov sp, ip
1801    .cfi_def_cfa_register sp
1802    # This will create a new save-all frame, required by the runtime.
1803    DELIVER_PENDING_EXCEPTION
1804END art_quick_generic_jni_trampoline
1805
1806    .extern artQuickToInterpreterBridge
1807ENTRY art_quick_to_interpreter_bridge
1808    SETUP_SAVE_REFS_AND_ARGS_FRAME r1
1809    mov     r1, r9                 @ pass Thread::Current
1810    mov     r2, sp                 @ pass SP
1811    blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
1812    ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
1813    // Tear down the callee-save frame. Skip arg registers.
1814    add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1815    .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
1816    RESTORE_SAVE_REFS_ONLY_FRAME
1817    REFRESH_MARKING_REGISTER
1818    cbnz    r2, 1f                 @ success if no exception is pending
1819    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
1820    bx      lr                     @ return on success
18211:
1822    DELIVER_PENDING_EXCEPTION
1823END art_quick_to_interpreter_bridge
1824
1825/*
1826 * Called to attempt to execute an obsolete method.
1827 */
1828ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1829
1830    /*
1831     * Routine that intercepts method calls and returns.
1832     */
1833    .extern artInstrumentationMethodEntryFromCode
1834    .extern artInstrumentationMethodExitFromCode
1835ENTRY art_quick_instrumentation_entry
1836    @ Make stack crawlable and clobber r2 and r3 (post saving)
1837    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
1838    @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
1839    str   r0, [sp, #4]
1840    mov   r2, r9         @ pass Thread::Current
1841    mov   r3, sp         @ pass SP
1842    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, SP)
1843    cbz   r0, .Ldeliver_instrumentation_entry_exception
1844                         @ Deliver exception if we got nullptr as function.
1845    mov   r12, r0        @ r12 holds reference to code
1846    ldr   r0, [sp, #4]   @ restore r0
1847    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1848    adr   lr, art_quick_instrumentation_exit + /* thumb mode */ 1
1849                         @ load art_quick_instrumentation_exit into lr in thumb mode
1850    REFRESH_MARKING_REGISTER
1851    bx    r12            @ call method with lr set to art_quick_instrumentation_exit
1852.Ldeliver_instrumentation_entry_exception:
1853    @ Deliver exception for art_quick_instrumentation_entry placed after
1854    @ art_quick_instrumentation_exit so that the fallthrough works.
1855    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1856    DELIVER_PENDING_EXCEPTION
1857END art_quick_instrumentation_entry
1858
1859ENTRY art_quick_instrumentation_exit
1860    mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
1861    SETUP_SAVE_EVERYTHING_FRAME r2
1862
1863    add   r3, sp, #8     @ store fpr_res pointer, in kSaveEverything frame
1864    add   r2, sp, #136   @ store gpr_res pointer, in kSaveEverything frame
1865    mov   r1, sp         @ pass SP
1866    mov   r0, r9         @ pass Thread::Current
1867    blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res*, fpr_res*)
1868
1869    cbz   r0, .Ldo_deliver_instrumentation_exception
1870                         @ Deliver exception if we got nullptr as function.
1871    cbnz  r1, .Ldeoptimize
1872    // Normal return.
1873    str   r0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
1874                         @ Set return pc.
1875    RESTORE_SAVE_EVERYTHING_FRAME
1876    REFRESH_MARKING_REGISTER
1877    bx lr
1878.Ldo_deliver_instrumentation_exception:
1879    DELIVER_PENDING_EXCEPTION_FRAME_READY
1880.Ldeoptimize:
1881    str   r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
1882                         @ Set return pc.
1883    RESTORE_SAVE_EVERYTHING_FRAME
1884    // Jump to art_quick_deoptimize.
1885    b     art_quick_deoptimize
1886END art_quick_instrumentation_exit
1887
1888    /*
1889     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1890     * will long jump to the upcall with a special exception of -1.
1891     */
1892    .extern artDeoptimize
1893ENTRY art_quick_deoptimize
1894    SETUP_SAVE_EVERYTHING_FRAME r0
1895    mov    r0, r9         @ pass Thread::Current
1896    blx    artDeoptimize  @ (Thread*)
1897END art_quick_deoptimize
1898
1899    /*
1900     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1901     * will long jump to the interpreter bridge.
1902     */
1903    .extern artDeoptimizeFromCompiledCode
1904ENTRY art_quick_deoptimize_from_compiled_code
1905    SETUP_SAVE_EVERYTHING_FRAME r1
1906    mov    r1, r9                         @ pass Thread::Current
1907    blx    artDeoptimizeFromCompiledCode  @ (DeoptimizationKind, Thread*)
1908END art_quick_deoptimize_from_compiled_code
1909
1910    /*
1911     * Signed 64-bit integer multiply.
1912     *
1913     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
1914     *        WX
1915     *      x YZ
1916     *  --------
1917     *     ZW ZX
1918     *  YW YX
1919     *
1920     * The low word of the result holds ZX, the high word holds
1921     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
1922     * it doesn't fit in the low 64 bits.
1923     *
1924     * Unlike most ARM math operations, multiply instructions have
1925     * restrictions on using the same register more than once (Rd and Rm
1926     * cannot be the same).
1927     */
1928    /* mul-long vAA, vBB, vCC */
1929ENTRY art_quick_mul_long
1930    push    {r9-r10}
1931    .cfi_adjust_cfa_offset 8
1932    .cfi_rel_offset r9, 0
1933    .cfi_rel_offset r10, 4
1934    mul     ip, r2, r1                  @  ip<- ZxW
1935    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
1936    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
1937    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
1938    mov     r0,r9
1939    mov     r1,r10
1940    pop     {r9-r10}
1941    .cfi_adjust_cfa_offset -8
1942    .cfi_restore r9
1943    .cfi_restore r10
1944    bx      lr
1945END art_quick_mul_long
1946
1947    /*
1948     * Long integer shift.  This is different from the generic 32/64-bit
1949     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1950     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1951     * 6 bits.
1952     * On entry:
1953     *   r0: low word
1954     *   r1: high word
1955     *   r2: shift count
1956     */
1957    /* shl-long vAA, vBB, vCC */
1958ARM_ENTRY art_quick_shl_long            @ ARM code as thumb code requires spills
1959    and     r2, r2, #63                 @ r2<- r2 & 0x3f
1960    mov     r1, r1, asl r2              @  r1<- r1 << r2
1961    rsb     r3, r2, #32                 @  r3<- 32 - r2
1962    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
1963    subs    ip, r2, #32                 @  ip<- r2 - 32
1964    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
1965    mov     r0, r0, asl r2              @  r0<- r0 << r2
1966    bx      lr
1967END art_quick_shl_long
1968
1969    /*
1970     * Long integer shift.  This is different from the generic 32/64-bit
1971     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1972     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1973     * 6 bits.
1974     * On entry:
1975     *   r0: low word
1976     *   r1: high word
1977     *   r2: shift count
1978     */
1979    /* shr-long vAA, vBB, vCC */
1980ARM_ENTRY art_quick_shr_long            @ ARM code as thumb code requires spills
1981    and     r2, r2, #63                 @ r0<- r0 & 0x3f
1982    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
1983    rsb     r3, r2, #32                 @  r3<- 32 - r2
1984    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
1985    subs    ip, r2, #32                 @  ip<- r2 - 32
1986    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
1987    mov     r1, r1, asr r2              @  r1<- r1 >> r2
1988    bx      lr
1989END art_quick_shr_long
1990
1991    /*
1992     * Long integer shift.  This is different from the generic 32/64-bit
1993     * binary operations because vAA/vBB are 64-bit but vCC (the shift
1994     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
1995     * 6 bits.
1996     * On entry:
1997     *   r0: low word
1998     *   r1: high word
1999     *   r2: shift count
2000     */
2001    /* ushr-long vAA, vBB, vCC */
2002ARM_ENTRY art_quick_ushr_long           @ ARM code as thumb code requires spills
2003    and     r2, r2, #63                 @ r0<- r0 & 0x3f
2004    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
2005    rsb     r3, r2, #32                 @  r3<- 32 - r2
2006    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
2007    subs    ip, r2, #32                 @  ip<- r2 - 32
2008    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
2009    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
2010    bx      lr
2011END art_quick_ushr_long
2012
2013    /*
2014     * String's indexOf.
2015     *
2016     * On entry:
2017     *    r0:   string object (known non-null)
2018     *    r1:   char to match (known <= 0xFFFF)
2019     *    r2:   Starting offset in string data
2020     */
2021ENTRY art_quick_indexof
2022    push {r4, r10-r11, lr} @ 4 words of callee saves
2023    .cfi_adjust_cfa_offset 16
2024    .cfi_rel_offset r4, 0
2025    .cfi_rel_offset r10, 4
2026    .cfi_rel_offset r11, 8
2027    .cfi_rel_offset lr, 12
2028#if (STRING_COMPRESSION_FEATURE)
2029    ldr   r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
2030#else
2031    ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
2032#endif
2033    add   r0, #MIRROR_STRING_VALUE_OFFSET
2034#if (STRING_COMPRESSION_FEATURE)
2035    /* r4 count (with flag) and r3 holds actual length */
2036    lsr   r3, r4, #1
2037#endif
2038    /* Clamp start to [0..count] */
2039    cmp   r2, #0
2040    it    lt
2041    movlt r2, #0
2042    cmp   r2, r3
2043    it    gt
2044    movgt r2, r3
2045
2046    /* Save a copy in r12 to later compute result */
2047    mov   r12, r0
2048
2049    /* Build pointer to start of data to compare and pre-bias */
2050#if (STRING_COMPRESSION_FEATURE)
2051    lsrs  r4, r4, #1
2052    bcc   .Lstring_indexof_compressed
2053#endif
2054    add   r0, r0, r2, lsl #1
2055    sub   r0, #2
2056
2057    /* Compute iteration count */
2058    sub   r2, r3, r2
2059
2060    /*
2061     * At this point we have:
2062     *   r0: start of data to test
2063     *   r1: char to compare
2064     *   r2: iteration count
2065     *   r4: compression style (used temporarily)
2066     *   r12: original start of string data
2067     *   r3, r4, r10, r11 available for loading string data
2068     */
2069
2070    subs  r2, #4
2071    blt   .Lindexof_remainder
2072
2073.Lindexof_loop4:
2074    ldrh  r3, [r0, #2]!
2075    ldrh  r4, [r0, #2]!
2076    ldrh  r10, [r0, #2]!
2077    ldrh  r11, [r0, #2]!
2078    cmp   r3, r1
2079    beq   .Lmatch_0
2080    cmp   r4, r1
2081    beq   .Lmatch_1
2082    cmp   r10, r1
2083    beq   .Lmatch_2
2084    cmp   r11, r1
2085    beq   .Lmatch_3
2086    subs  r2, #4
2087    bge   .Lindexof_loop4
2088
2089.Lindexof_remainder:
2090    adds  r2, #4
2091    beq   .Lindexof_nomatch
2092
2093.Lindexof_loop1:
2094    ldrh  r3, [r0, #2]!
2095    cmp   r3, r1
2096    beq   .Lmatch_3
2097    subs  r2, #1
2098    bne   .Lindexof_loop1
2099
2100.Lindexof_nomatch:
2101    mov   r0, #-1
2102    pop {r4, r10-r11, pc}
2103
2104.Lmatch_0:
2105    sub   r0, #6
2106    sub   r0, r12
2107    asr   r0, r0, #1
2108    pop {r4, r10-r11, pc}
2109.Lmatch_1:
2110    sub   r0, #4
2111    sub   r0, r12
2112    asr   r0, r0, #1
2113    pop {r4, r10-r11, pc}
2114.Lmatch_2:
2115    sub   r0, #2
2116    sub   r0, r12
2117    asr   r0, r0, #1
2118    pop {r4, r10-r11, pc}
2119.Lmatch_3:
2120    sub   r0, r12
2121    asr   r0, r0, #1
2122    pop {r4, r10-r11, pc}
2123#if (STRING_COMPRESSION_FEATURE)
2124.Lstring_indexof_compressed:
2125    add   r0, r0, r2
2126    sub   r0, #1
2127    sub   r2, r3, r2
2128.Lstring_indexof_compressed_loop:
2129    subs  r2, #1
2130    blt   .Lindexof_nomatch
2131    ldrb  r3, [r0, #1]!
2132    cmp   r3, r1
2133    beq   .Lstring_indexof_compressed_matched
2134    b     .Lstring_indexof_compressed_loop
2135.Lstring_indexof_compressed_matched:
2136    sub   r0, r12
2137    pop {r4, r10-r11, pc}
2138#endif
2139END art_quick_indexof
2140
2141    /* Assembly routines used to handle ABI differences. */
2142
2143    /* double fmod(double a, double b) */
2144    .extern fmod
2145ENTRY art_quick_fmod
2146    push  {lr}
2147    .cfi_adjust_cfa_offset 4
2148    .cfi_rel_offset lr, 0
2149    sub   sp, #4
2150    .cfi_adjust_cfa_offset 4
2151    vmov  r0, r1, d0
2152    vmov  r2, r3, d1
2153    bl    fmod
2154    vmov  d0, r0, r1
2155    add   sp, #4
2156    .cfi_adjust_cfa_offset -4
2157    pop   {pc}
2158END art_quick_fmod
2159
2160    /* float fmodf(float a, float b) */
2161     .extern fmodf
2162ENTRY art_quick_fmodf
2163    push  {lr}
2164    .cfi_adjust_cfa_offset 4
2165    .cfi_rel_offset lr, 0
2166    sub   sp, #4
2167    .cfi_adjust_cfa_offset 4
2168    vmov  r0, r1, d0
2169    bl    fmodf
2170    vmov  s0, r0
2171    add   sp, #4
2172    .cfi_adjust_cfa_offset -4
2173    pop   {pc}
2174END art_quick_fmodf
2175
2176    /* int64_t art_d2l(double d) */
2177    .extern art_d2l
2178ENTRY art_quick_d2l
2179    vmov  r0, r1, d0
2180    b     art_d2l
2181END art_quick_d2l
2182
2183    /* int64_t art_f2l(float f) */
2184    .extern art_f2l
2185ENTRY art_quick_f2l
2186    vmov  r0, s0
2187    b     art_f2l
2188END art_quick_f2l
2189
2190    /* float art_l2f(int64_t l) */
2191    .extern art_l2f
2192ENTRY art_quick_l2f
2193    push  {lr}
2194    .cfi_adjust_cfa_offset 4
2195    .cfi_rel_offset lr, 0
2196    sub   sp, #4
2197    .cfi_adjust_cfa_offset 4
2198    bl    art_l2f
2199    vmov  s0, r0
2200    add   sp, #4
2201    .cfi_adjust_cfa_offset -4
2202    pop   {pc}
2203END art_quick_l2f
2204
2205.macro CONDITIONAL_CBZ reg, reg_if, dest
2206.ifc \reg, \reg_if
2207    cbz \reg, \dest
2208.endif
2209.endm
2210
2211.macro CONDITIONAL_CMPBZ reg, reg_if, dest
2212.ifc \reg, \reg_if
2213    cmp \reg, #0
2214    beq \dest
2215.endif
2216.endm
2217
2218// Use CBZ if the register is in {r0, r7} otherwise compare and branch.
2219.macro SMART_CBZ reg, dest
2220    CONDITIONAL_CBZ \reg, r0, \dest
2221    CONDITIONAL_CBZ \reg, r1, \dest
2222    CONDITIONAL_CBZ \reg, r2, \dest
2223    CONDITIONAL_CBZ \reg, r3, \dest
2224    CONDITIONAL_CBZ \reg, r4, \dest
2225    CONDITIONAL_CBZ \reg, r5, \dest
2226    CONDITIONAL_CBZ \reg, r6, \dest
2227    CONDITIONAL_CBZ \reg, r7, \dest
2228    CONDITIONAL_CMPBZ \reg, r8, \dest
2229    CONDITIONAL_CMPBZ \reg, r9, \dest
2230    CONDITIONAL_CMPBZ \reg, r10, \dest
2231    CONDITIONAL_CMPBZ \reg, r11, \dest
2232    CONDITIONAL_CMPBZ \reg, r12, \dest
2233    CONDITIONAL_CMPBZ \reg, r13, \dest
2234    CONDITIONAL_CMPBZ \reg, r14, \dest
2235    CONDITIONAL_CMPBZ \reg, r15, \dest
2236.endm
2237
2238    /*
2239     * Create a function `name` calling the ReadBarrier::Mark routine,
2240     * getting its argument and returning its result through register
2241     * `reg`, saving and restoring all caller-save registers.
2242     *
2243     * IP is clobbered; `reg` must not be IP.
2244     *
2245     * If `reg` is different from `r0`, the generated function follows a
2246     * non-standard runtime calling convention:
2247     * - register `reg` is used to pass the (sole) argument of this
2248     *   function (instead of R0);
2249     * - register `reg` is used to return the result of this function
2250     *   (instead of R0);
2251     * - R0 is treated like a normal (non-argument) caller-save register;
2252     * - everything else is the same as in the standard runtime calling
2253     *   convention (e.g. standard callee-save registers are preserved).
2254     */
2255.macro READ_BARRIER_MARK_REG name, reg
2256ENTRY \name
2257    // Null check so that we can load the lock word.
2258    SMART_CBZ \reg, .Lret_rb_\name
2259    // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
2260    ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
2261    tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
2262    beq .Lnot_marked_rb_\name
2263    // Already marked, return right away.
2264.Lret_rb_\name:
2265    bx lr
2266
2267.Lnot_marked_rb_\name:
2268    // Test that both the forwarding state bits are 1.
2269#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
2270    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
2271    // the highest bits and the "forwarding address" state to have all bits set.
2272#error "Unexpected lock word state shift or forwarding address state value."
2273#endif
2274    cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
2275    bhs .Lret_forwarding_address\name
2276
2277.Lslow_rb_\name:
2278    // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to
2279    // make a tail call here. Currently, it serves only for stack alignment but
2280    // we may reintroduce kSaveEverything calls here in the future.
2281    push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
2282    .cfi_adjust_cfa_offset 32
2283    .cfi_rel_offset r0, 0
2284    .cfi_rel_offset r1, 4
2285    .cfi_rel_offset r2, 8
2286    .cfi_rel_offset r3, 12
2287    .cfi_rel_offset r4, 16
2288    .cfi_rel_offset r9, 20
2289    .cfi_rel_offset ip, 24
2290    .cfi_rel_offset lr, 28
2291
2292    .ifnc \reg, r0
2293      mov   r0, \reg                    @ pass arg1 - obj from `reg`
2294    .endif
2295
2296    vpush {s0-s15}                      @ save floating-point caller-save registers
2297    .cfi_adjust_cfa_offset 64
2298    bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
2299    vpop {s0-s15}                       @ restore floating-point registers
2300    .cfi_adjust_cfa_offset -64
2301
2302    .ifc \reg, r0                       @ Save result to the stack slot or destination register.
2303      str r0, [sp, #0]
2304    .else
2305      .ifc \reg, r1
2306        str r0, [sp, #4]
2307      .else
2308        .ifc \reg, r2
2309          str r0, [sp, #8]
2310        .else
2311          .ifc \reg, r3
2312            str r0, [sp, #12]
2313          .else
2314            .ifc \reg, r4
2315              str r0, [sp, #16]
2316            .else
2317              .ifc \reg, r9
2318                str r0, [sp, #20]
2319              .else
2320                mov \reg, r0
2321              .endif
2322            .endif
2323          .endif
2324        .endif
2325      .endif
2326    .endif
2327
2328    pop   {r0-r4, r9, ip, lr}           @ restore caller-save registers
2329    .cfi_adjust_cfa_offset -32
2330    .cfi_restore r0
2331    .cfi_restore r1
2332    .cfi_restore r2
2333    .cfi_restore r3
2334    .cfi_restore r4
2335    .cfi_restore r9
2336    .cfi_restore ip
2337    .cfi_restore lr
2338    bx lr
2339.Lret_forwarding_address\name:
2340    // Shift left by the forwarding address shift. This clears out the state bits since they are
2341    // in the top 2 bits of the lock word.
2342    lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2343    bx lr
2344END \name
2345.endm
2346
2347READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
2348READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
2349READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
2350READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
2351READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
2352READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
2353READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
2354READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
2355READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
2356READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
2357READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
2358READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
2359
2360// Helper macros for Baker CC read barrier mark introspection (BRBMI).
2361.macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register
2362    \macro_for_register r0
2363    \macro_for_register r1
2364    \macro_for_register r2
2365    \macro_for_register r3
2366    \macro_for_reserved_register  // R4 is reserved for the entrypoint address.
2367    \macro_for_register r5
2368    \macro_for_register r6
2369    \macro_for_register r7
2370    \macro_for_register r8
2371    \macro_for_register r9
2372    \macro_for_register r10
2373    \macro_for_register r11
2374.endm
2375
2376.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
2377    BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register
2378    \macro_for_reserved_register  // IP is reserved.
2379    \macro_for_reserved_register  // SP is reserved.
2380    \macro_for_reserved_register  // LR is reserved.
2381    \macro_for_reserved_register  // PC is reserved.
2382.endm
2383
2384.macro BRBMI_RETURN_SWITCH_CASE reg
2385.Lmark_introspection_return_switch_case_\reg:
2386    mov     \reg, ip
2387    bx      lr
2388.endm
2389
2390.macro BRBMI_BAD_RETURN_SWITCH_CASE
2391.Lmark_introspection_return_switch_case_bad:
2392    BRBMI_BKPT_FILL_4B
2393.endm
2394
2395.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
2396    .byte   (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
2397.endm
2398
2399.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
2400    .byte   (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
2401.endm
2402
2403#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
2404#error "Array and field introspection code sharing requires same LDR offset."
2405#endif
2406.macro BRBMI_ARRAY_LOAD index_reg
2407    ldr     ip, [ip, \index_reg, lsl #2]                // 4 bytes.
2408    b       art_quick_read_barrier_mark_introspection   // Should be 2 bytes, encoding T2.
2409    .balign 8                                           // Add padding to 8 bytes.
2410.endm
2411
2412.macro BRBMI_BKPT_FILL_4B
2413    bkpt    0
2414    bkpt    0
2415.endm
2416
2417.macro BRBMI_BKPT_FILL_8B
2418    BRBMI_BKPT_FILL_4B
2419    BRBMI_BKPT_FILL_4B
2420.endm
2421
2422.macro BRBMI_RUNTIME_CALL
2423    // Note: This macro generates exactly 22 bytes of code. The core register
2424    // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions.
2425
2426    push   {r0-r3, r7, lr}            // Save return address and caller-save registers.
2427    .cfi_adjust_cfa_offset 24
2428    .cfi_rel_offset r0, 0
2429    .cfi_rel_offset r1, 4
2430    .cfi_rel_offset r2, 8
2431    .cfi_rel_offset r3, 12
2432    .cfi_rel_offset r7, 16
2433    .cfi_rel_offset lr, 20
2434
2435    mov     r0, ip                    // Pass the reference.
2436    vpush {s0-s15}                    // save floating-point caller-save registers
2437    .cfi_adjust_cfa_offset 64
2438    bl      artReadBarrierMark        // r0 <- artReadBarrierMark(obj)
2439    vpop    {s0-s15}                  // restore floating-point registers
2440    .cfi_adjust_cfa_offset -64
2441    mov     ip, r0                    // Move reference to ip in preparation for return switch.
2442
2443    pop     {r0-r3, r7, lr}           // Restore registers.
2444    .cfi_adjust_cfa_offset -24
2445    .cfi_restore r0
2446    .cfi_restore r1
2447    .cfi_restore r2
2448    .cfi_restore r3
2449    .cfi_restore r7
2450    .cfi_restore lr
2451.endm
2452
2453.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix
2454    // If reference is null, just return it in the right register.
2455    cmp     ip, #0
2456    beq     .Lmark_introspection_return\label_suffix
2457    // Use R4 as temp and check the mark bit of the reference.
2458    ldr     r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2459    tst     r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
2460    beq     .Lmark_introspection_unmarked\label_suffix
2461.Lmark_introspection_return\label_suffix:
2462.endm
2463
2464.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix
2465.Lmark_introspection_unmarked\label_suffix:
2466    // Check if the top two bits are one, if this is the case it is a forwarding address.
2467#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
2468    // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
2469    // the highest bits and the "forwarding address" state to have all bits set.
2470#error "Unexpected lock word state shift or forwarding address state value."
2471#endif
2472    cmp     r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
2473    bhs     .Lmark_introspection_forwarding_address\label_suffix
2474.endm
2475
2476.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix
2477.Lmark_introspection_forwarding_address\label_suffix:
2478    // Note: This macro generates exactly 22 bytes of code, the branch is near.
2479
2480    // Shift left by the forwarding address shift. This clears out the state bits since they are
2481    // in the top 2 bits of the lock word.
2482    lsl     ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2483    b       .Lmark_introspection_return\label_suffix
2484.endm
2485
2486.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
2487    // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
2488    ldrh    r4, [lr, #(-1 + \ldr_offset + 2)]
2489.endm
2490
2491.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
2492    // Load the 16-bit instruction. Adjust for the thumb state in LR.
2493    ldrh    r4, [lr, #(-1 + \ldr_offset)]
2494.endm
2495
2496.macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix
2497    .balign 64
2498    .thumb_func
2499    .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
2500    .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2501    .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
2502art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
2503    BRBMI_RUNTIME_CALL
2504    // Load the LDR (or the half of it) that contains Rt.
2505    BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset
2506    b       .Lmark_introspection_extract_register_and_return\label_suffix
2507    // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for
2508    // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze
2509    // the 6 byte forwarding address extraction here across the 32-byte boundary.
2510    BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix
2511    // And the slow path taking exactly 30 bytes (6 bytes for the forwarding
2512    // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near
2513    // branch) shall take the rest of the 32-byte section (within a cache line).
2514    BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
2515    BRBMI_RUNTIME_CALL
2516    b       .Lmark_introspection_return\label_suffix
2517.endm
2518
2519    /*
2520     * Use introspection to load a reference from the same address as the LDR
2521     * instruction in generated code would load (unless loaded by the thunk,
2522     * see below), call ReadBarrier::Mark() with that reference if needed
2523     * and return it in the same register as the LDR instruction would load.
2524     *
2525     * The entrypoint is called through a thunk that differs across load kinds.
2526     * For field and array loads the LDR instruction in generated code follows
2527     * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning)
2528     * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where
2529     * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk
2530     * knows the holder and performs the gray bit check, returning to the LDR
2531     * instruction if the object is not gray, so this entrypoint no longer
2532     * needs to know anything about the holder. For GC root loads, the LDR
2533     * instruction in generated code precedes the branch to the thunk, i.e. the
2534     * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1)
2535     * where the -1 is again the Thumb mode bit adjustment, and the thunk does
2536     * not do the gray bit check.
2537     *
2538     * For field accesses and array loads with a constant index the thunk loads
2539     * the reference into IP using introspection and calls the main entrypoint,
2540     * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
2541     * the passed reference is poisoned.
2542     *
2543     * For array accesses with non-constant index, the thunk inserts the bits
2544     * 0-5 of the LDR instruction to the entrypoint address, effectively
2545     * calculating a switch case label based on the index register (bits 0-3)
2546     * and adding an extra offset (bits 4-5 hold the shift which is always 2
2547     * for reference loads) to differentiate from the main entrypoint, then
2548     * moves the base register to IP and jumps to the switch case. Therefore
2549     * we need to align the main entrypoint to 512 bytes, accounting for
2550     * a 256-byte offset followed by 16 array entrypoints starting at
2551     * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR
2552     * (register) and a branch to the main entrypoint.
2553     *
2554     * For GC root accesses we cannot use the main entrypoint because of the
2555     * different offset where the LDR instruction in generated code is located.
2556     * (And even with heap poisoning enabled, GC roots are not poisoned.)
2557     * To re-use the same entrypoint pointer in generated code, we make sure
2558     * that the gc root entrypoint (a copy of the entrypoint with a different
2559     * offset for introspection loads) is located at a known offset (128 bytes,
2560     * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
2561     * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
2562     * the root register to IP and jumps to the customized entrypoint,
2563     * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
2564     * performs all the fast-path checks, so we need just the slow path.
2565     *
2566     * The code structure is
2567     *   art_quick_read_barrier_mark_introspection:
2568     *     Up to 32 bytes code for main entrypoint fast-path code for fields
2569     *     (and array elements with constant offset) with LDR encoding T3;
2570     *     jumps to the switch in the "narrow" entrypoint.
2571     *     Padding to 32 bytes if needed.
2572     *   art_quick_read_barrier_mark_introspection_narrow:
2573     *     Up to 48 bytes code for fast path code for fields (and array
2574     *     elements with constant offset) with LDR encoding T1, ending in the
2575     *     return switch instruction TBB and the table with switch offsets.
2576     *     Padding to 80 bytes if needed.
2577     *   .Lmark_introspection_return_switch_case_r0:
2578     *     Exactly 48 bytes of code for the return switch cases (12 cases,
2579     *     including BKPT for the reserved registers).
2580     *     Ends at 128 bytes total.
2581     *   art_quick_read_barrier_mark_introspection_gc_roots_wide:
2582     *     GC root entrypoint code for LDR encoding T3 (28 bytes).
2583     *     Forwarding address extraction for LDR encoding T3 (6 bytes).
2584     *     Slow path for main entrypoint for LDR encoding T3 (30 bytes).
2585     *     Ends at 192 bytes total.
2586     *   art_quick_read_barrier_mark_introspection_gc_roots_narrow:
2587     *     GC root entrypoint code for LDR encoding T1 (28 bytes).
2588     *     Forwarding address extraction for LDR encoding T1 (6 bytes).
2589     *     Slow path for main entrypoint for LDR encoding T1 (30 bytes).
2590     *     Ends at 256 bytes total.
2591     *   art_quick_read_barrier_mark_introspection_arrays:
2592     *     Exactly 128 bytes for array load switch cases (16x2 instructions).
2593     */
2594    .balign 512
2595ENTRY art_quick_read_barrier_mark_introspection
2596    // At this point, IP contains the reference, R4 can be freely used.
2597    // (R4 is reserved for the entrypoint address.)
2598    // For heap poisoning, the reference is poisoned, so unpoison it first.
2599    UNPOISON_HEAP_REF ip
2600    // Check for null or marked, lock word is loaded into IP.
2601    BRBMI_CHECK_NULL_AND_MARKED _wide
2602    // Load the half of the instruction that contains Rt.
2603    BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
2604.Lmark_introspection_extract_register_and_return_wide:
2605    lsr     r4, r4, #12               // Extract `ref_reg`.
2606    b       .Lmark_introspection_return_switch
2607
2608    .balign 32
2609    .thumb_func
2610    .type art_quick_read_barrier_mark_introspection_narrow, #function
2611    .hidden art_quick_read_barrier_mark_introspection_narrow
2612    .global art_quick_read_barrier_mark_introspection_narrow
2613art_quick_read_barrier_mark_introspection_narrow:
2614    // At this point, IP contains the reference, R4 can be freely used.
2615    // (R4 is reserved for the entrypoint address.)
2616    // For heap poisoning, the reference is poisoned, so unpoison it first.
2617    UNPOISON_HEAP_REF ip
2618    // Check for null or marked, lock word is loaded into R4.
2619    BRBMI_CHECK_NULL_AND_MARKED _narrow
2620    // Load the 16-bit instruction.
2621    BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
2622.Lmark_introspection_extract_register_and_return_narrow:
2623    and     r4, r4, #7                // Extract `ref_reg`.
2624.Lmark_introspection_return_switch:
2625    tbb     [pc, r4]                  // Jump to the switch case.
2626.Lmark_introspection_return_table:
2627    BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
2628    .balign 16
2629    BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE
2630
2631    BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
2632    BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
2633
2634    .balign 256
2635    .thumb_func
2636    .type art_quick_read_barrier_mark_introspection_arrays, #function
2637    .hidden art_quick_read_barrier_mark_introspection_arrays
2638    .global art_quick_read_barrier_mark_introspection_arrays
2639art_quick_read_barrier_mark_introspection_arrays:
2640    BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
2641END art_quick_read_barrier_mark_introspection
2642
2643.extern artInvokePolymorphic
2644ENTRY art_quick_invoke_polymorphic
2645    SETUP_SAVE_REFS_AND_ARGS_FRAME r2
2646    mov     r2, r9                 @ pass Thread::Current
2647    mov     r3, sp                 @ pass SP
2648    mov     r0, #0                 @ initialize 64-bit JValue as zero.
2649    str     r0, [sp, #-4]!
2650    .cfi_adjust_cfa_offset 4
2651    str     r0, [sp, #-4]!
2652    .cfi_adjust_cfa_offset 4
2653    mov     r0, sp                 @ pass JValue for return result as first argument.
2654    bl      artInvokePolymorphic   @ artInvokePolymorphic(JValue, receiver, Thread*, SP)
2655    sub     r0, 'A'                @ return value is descriptor of handle's return type.
2656    cmp     r0, 'Z' - 'A'          @ check if value is in bounds of handler table
2657    bgt     .Lcleanup_and_return   @ and clean-up if not.
2658    adr     r1, .Lhandler_table
2659    tbb     [r0, r1]               @ branch to handler for return value based on return type.
2660
2661.Lstart_of_handlers:
2662.Lstore_boolean_result:
2663    ldrb    r0, [sp]               @ Copy boolean value to return value of this function.
2664    b       .Lcleanup_and_return
2665.Lstore_char_result:
2666    ldrh    r0, [sp]               @ Copy char value to return value of this function.
2667    b       .Lcleanup_and_return
2668.Lstore_float_result:
2669    vldr    s0, [sp]               @ Copy float value from JValue result to the context restored by
2670    vstr    s0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
2671    b       .Lcleanup_and_return
2672.Lstore_double_result:
2673    vldr    d0, [sp]               @ Copy double value from JValue result to the context restored by
2674    vstr    d0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
2675    b       .Lcleanup_and_return
2676.Lstore_long_result:
2677    ldr     r1, [sp, #4]           @ Copy the upper bits from JValue result to the context restored by
2678    str     r1, [sp, #80]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
2679    // Fall-through for lower bits.
2680.Lstore_int_result:
2681    ldr     r0, [sp]               @ Copy int value to return value of this function.
2682    // Fall-through to clean up and return.
2683.Lcleanup_and_return:
2684    add     sp, #8
2685    .cfi_adjust_cfa_offset -8
2686    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2687    REFRESH_MARKING_REGISTER
2688    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
2689
2690.macro HANDLER_TABLE_OFFSET handler_label
2691    .byte (\handler_label - .Lstart_of_handlers) / 2
2692.endm
2693
2694.Lhandler_table:
2695    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
2696    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // B (byte)
2697    HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
2698    HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
2699    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
2700    HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
2701    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
2702    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
2703    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // I (int)
2704    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
2705    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
2706    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // L (object)
2707    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
2708    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
2709    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
2710    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
2711    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
2712    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
2713    HANDLER_TABLE_OFFSET(.Lstore_int_result)      // S (short)
2714    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
2715    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
2716    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
2717    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
2718    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
2719    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
2720    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
2721.purgem HANDLER_TABLE_OFFSET
2722END art_quick_invoke_polymorphic
2723
2724// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2725//  Argument 0: r0: The context pointer for ExecuteSwitchImpl.
2726//  Argument 1: r1: Pointer to the templated ExecuteSwitchImpl to call.
2727//  Argument 2: r2: The value of DEX PC (memory address of the methods bytecode).
2728ENTRY ExecuteSwitchImplAsm
2729    push {r4, lr}                                 // 2 words of callee saves.
2730    .cfi_adjust_cfa_offset 8
2731    .cfi_rel_offset r4, 0
2732    .cfi_rel_offset lr, 4
2733    mov r4, r2                                    // r4 = DEX PC
2734    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* r0 */, 4 /* r4 */, 0)
2735    blx r1                                        // Call the wrapped method.
2736    pop {r4, pc}
2737END ExecuteSwitchImplAsm
2738