• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm64.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21
22.macro SAVE_REG_INCREASE_FRAME reg, frame_adjustment
23    str \reg, [sp, #-(\frame_adjustment)]!
24    .cfi_adjust_cfa_offset (\frame_adjustment)
25    .cfi_rel_offset \reg, 0
26.endm
27
28.macro RESTORE_REG_DECREASE_FRAME reg, frame_adjustment
29    ldr \reg, [sp], #(\frame_adjustment)
30    .cfi_restore \reg
31    .cfi_adjust_cfa_offset -(\frame_adjustment)
32.endm
33
34.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
35    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
36    .cfi_adjust_cfa_offset (\frame_adjustment)
37    .cfi_rel_offset \reg1, 0
38    .cfi_rel_offset \reg2, 8
39.endm
40
41.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
42    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
43    .cfi_restore \reg1
44    .cfi_restore \reg2
45    .cfi_adjust_cfa_offset -(\frame_adjustment)
46.endm
47
48.macro POP_SAVE_REFS_ONLY_FRAME
49    DECREASE_FRAME 96
50.endm
51
52    /*
53     * Macro that sets up the callee save frame to conform with
54     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
55     *
56     * TODO This is probably too conservative - saving FP & LR.
57     */
58.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
59    // art::Runtime* xIP0 = art::Runtime::instance_;
60    // Our registers aren't intermixed - just spill in order.
61    LOAD_RUNTIME_INSTANCE xIP0
62
63    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
64    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
65
66    INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
67    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp
68
69    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
70    // Place sp in Thread::Current()->top_quick_frame.
71    mov xIP0, sp
72    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
73.endm
74
75.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
76    INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
77    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp
78    str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
79    // Place sp in Thread::Current()->top_quick_frame.
80    mov xIP0, sp
81    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
82.endm
83
84    /*
85     * Macro that sets up the callee save frame to conform with
86     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
87     * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
88     * and saving registers x29 and LR is handled elsewhere.
89     */
90.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR \
91        runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
92    // Ugly compile-time check, but we only have the preprocessor.
93#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
94#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
95#endif
96
97    // Save FP registers.
98    stp d0, d1,   [sp, #16]
99    stp d2, d3,   [sp, #32]
100    stp d4, d5,   [sp, #48]
101    stp d6, d7,   [sp, #64]
102    stp d8, d9,   [sp, #80]
103    stp d10, d11, [sp, #96]
104    stp d12, d13, [sp, #112]
105    stp d14, d15, [sp, #128]
106    stp d16, d17, [sp, #144]
107    stp d18, d19, [sp, #160]
108    stp d20, d21, [sp, #176]
109    stp d22, d23, [sp, #192]
110    stp d24, d25, [sp, #208]
111    stp d26, d27, [sp, #224]
112    stp d28, d29, [sp, #240]
113    stp d30, d31, [sp, #256]
114
115    // Save core registers.
116    SAVE_TWO_REGS  x0,  x1, 272
117    SAVE_TWO_REGS  x2,  x3, 288
118    SAVE_TWO_REGS  x4,  x5, 304
119    SAVE_TWO_REGS  x6,  x7, 320
120    SAVE_TWO_REGS  x8,  x9, 336
121    SAVE_TWO_REGS x10, x11, 352
122    SAVE_TWO_REGS x12, x13, 368
123    SAVE_TWO_REGS x14, x15, 384
124    SAVE_TWO_REGS x16, x17, 400 // Do not save the platform register.
125    SAVE_TWO_REGS x19, x20, 416
126    SAVE_TWO_REGS x21, x22, 432
127    SAVE_TWO_REGS x23, x24, 448
128    SAVE_TWO_REGS x25, x26, 464
129    SAVE_TWO_REGS x27, x28, 480
130
131    // art::Runtime* xIP0 = art::Runtime::instance_;
132    LOAD_RUNTIME_INSTANCE xIP0
133
134    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
135    ldr xIP0, [xIP0, \runtime_method_offset]
136
137    // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
138    str xIP0, [sp]
139    // Place sp in Thread::Current()->top_quick_frame.
140    mov xIP0, sp
141    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
142.endm
143
144    /*
145     * Macro that sets up the callee save frame to conform with
146     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
147     */
148.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
149    INCREASE_FRAME 512
150    SAVE_TWO_REGS x29, xLR, 496
151    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR \runtime_method_offset
152.endm
153
154.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
155    // Restore FP registers.
156    ldp d0, d1,   [sp, #16]
157    ldp d2, d3,   [sp, #32]
158    ldp d4, d5,   [sp, #48]
159    ldp d6, d7,   [sp, #64]
160    ldp d8, d9,   [sp, #80]
161    ldp d10, d11, [sp, #96]
162    ldp d12, d13, [sp, #112]
163    ldp d14, d15, [sp, #128]
164    ldp d16, d17, [sp, #144]
165    ldp d18, d19, [sp, #160]
166    ldp d20, d21, [sp, #176]
167    ldp d22, d23, [sp, #192]
168    ldp d24, d25, [sp, #208]
169    ldp d26, d27, [sp, #224]
170    ldp d28, d29, [sp, #240]
171    ldp d30, d31, [sp, #256]
172
173    // Restore core registers, except x0.
174    RESTORE_REG            x1, 280
175    RESTORE_TWO_REGS  x2,  x3, 288
176    RESTORE_TWO_REGS  x4,  x5, 304
177    RESTORE_TWO_REGS  x6,  x7, 320
178    RESTORE_TWO_REGS  x8,  x9, 336
179    RESTORE_TWO_REGS x10, x11, 352
180    RESTORE_TWO_REGS x12, x13, 368
181    RESTORE_TWO_REGS x14, x15, 384
182    RESTORE_TWO_REGS x16, x17, 400 // Do not restore the platform register.
183    RESTORE_TWO_REGS x19, x20, 416
184    RESTORE_TWO_REGS x21, x22, 432
185    RESTORE_TWO_REGS x23, x24, 448
186    RESTORE_TWO_REGS x25, x26, 464
187    RESTORE_TWO_REGS x27, x28, 480
188    RESTORE_TWO_REGS x29, xLR, 496
189
190    DECREASE_FRAME 512
191.endm
192
193.macro RESTORE_SAVE_EVERYTHING_FRAME
194    RESTORE_REG  x0, 272
195    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
196.endm
197
198.macro RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
199    ldr x1, [xSELF, # THREAD_EXCEPTION_OFFSET]  // Get exception field.
200    cbnz x1, 1f
201    DEOPT_OR_RETURN x1                         // Check if deopt is required
2021:                                             // deliver exception on current thread
203    DELIVER_PENDING_EXCEPTION
204.endm
205
206.macro DEOPT_OR_RETURN temp, is_ref = 0
207  ldr \temp, [xSELF, #THREAD_DEOPT_CHECK_REQUIRED_OFFSET]
208  cbnz \temp, 2f
209  ret
2102:
211  SETUP_SAVE_EVERYTHING_FRAME
212  mov x2, \is_ref                   // pass if result is a reference
213  mov x1, x0                        // pass the result
214  mov x0, xSELF                     // Thread::Current
215  bl artDeoptimizeIfNeeded
216  CFI_REMEMBER_STATE
217  RESTORE_SAVE_EVERYTHING_FRAME
218  REFRESH_MARKING_REGISTER
219  ret
220  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
221.endm
222
223.macro DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_X0 temp, is_ref
224  ldr \temp, [xSELF, #THREAD_DEOPT_CHECK_REQUIRED_OFFSET]
225  cbnz \temp, 2f
226  CFI_REMEMBER_STATE
227  RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
228  REFRESH_MARKING_REGISTER
229  ret
230  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
2312:
232  str x0, [sp, #SAVE_EVERYTHING_FRAME_X0_OFFSET] // update result in the frame
233  mov x2, \is_ref                                // pass if result is a reference
234  mov x1, x0                                     // pass the result
235  mov x0, xSELF                                  // Thread::Current
236  bl artDeoptimizeIfNeeded
237  CFI_REMEMBER_STATE
238  RESTORE_SAVE_EVERYTHING_FRAME
239  REFRESH_MARKING_REGISTER
240  ret
241  CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
242.endm
243
244
245.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
246    cbnz w0, 1f                // result non-zero branch over
247    DEOPT_OR_RETURN x1
2481:
249    DELIVER_PENDING_EXCEPTION
250.endm
251
252.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
253    .extern \cxx_name
254ENTRY \c_name
255    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
256    mov x0, xSELF                     // pass Thread::Current
257    bl  \cxx_name                     // \cxx_name(Thread*)
258    brk 0
259END \c_name
260.endm
261
262.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
263    .extern \cxx_name
264ENTRY \c_name
265    SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
266    mov x0, xSELF                     // pass Thread::Current
267    bl  \cxx_name                     // \cxx_name(Thread*)
268    brk 0
269END \c_name
270.endm
271
272.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
273    .extern \cxx_name
274ENTRY \c_name
275    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
276    mov x1, xSELF                     // pass Thread::Current.
277    bl  \cxx_name                     // \cxx_name(arg, Thread*).
278    brk 0
279END \c_name
280.endm
281
282.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
283    .extern \cxx_name
284ENTRY \c_name
285    SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
286    mov x2, xSELF                     // pass Thread::Current
287    bl  \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
288    brk 0
289END \c_name
290.endm
291
292    /*
293     * Called by managed code, saves callee saves and then calls artThrowException
294     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
295     */
296ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
297
298    /*
299     * Called by managed code to create and deliver a NullPointerException.
300     */
301NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING \
302        art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
303
304    /*
305     * Call installed by a signal handler to create and deliver a NullPointerException.
306     */
307    .extern art_quick_throw_null_pointer_exception_from_signal
308ENTRY art_quick_throw_null_pointer_exception_from_signal
309    // The fault handler pushes the gc map address, i.e. "return address", to stack
310    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
311    .cfi_def_cfa_offset __SIZEOF_POINTER__
312    .cfi_rel_offset lr, 0
313    // Save all registers as basis for long jump context.
314    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
315    SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)  // LR already saved.
316    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
317    mov x0, lr                        // pass the fault address stored in LR by the fault handler.
318    mov x1, xSELF                     // pass Thread::Current.
319    bl  artThrowNullPointerExceptionFromSignal  // (arg, Thread*).
320    brk 0
321END art_quick_throw_null_pointer_exception_from_signal
322
323    /*
324     * Called by managed code to create and deliver an ArithmeticException.
325     */
326NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
327
328    /*
329     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
330     * index, arg2 holds limit.
331     */
332TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
333
334    /*
335     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
336     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
337     */
338TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING \
339        art_quick_throw_string_bounds, artThrowStringBoundsFromCode
340
341    /*
342     * Called by managed code to create and deliver a StackOverflowError.
343     */
344NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
345
346    /*
347     * All generated callsites for interface invokes and invocation slow paths will load arguments
348     * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
349     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
350     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
351     *
352     * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
353     * of the target Method* in x0 and method->code_ in x1.
354     *
355     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
356     * thread and we branch to another stub to deliver it.
357     *
358     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
359     * pointing back to the original caller.
360     *
361     * Adapted from ARM32 code.
362     *
363     * Clobbers xIP0.
364     */
365.macro INVOKE_TRAMPOLINE_BODY cxx_name
366    .extern \cxx_name
367    SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
368    // Helper signature is always
369    // (method_idx, *this_object, *caller_method, *self, sp)
370
371    mov    x2, xSELF                      // pass Thread::Current
372    mov    x3, sp
373    bl     \cxx_name                      // (method_idx, this, Thread*, SP)
374    mov    xIP0, x1                       // save Method*->code_
375    RESTORE_SAVE_REFS_AND_ARGS_FRAME
376    REFRESH_MARKING_REGISTER
377    cbz    x0, 1f                         // did we find the target? if not go to exception delivery
378    br     xIP0                           // tail call to target
3791:
380    DELIVER_PENDING_EXCEPTION
381.endm
382.macro INVOKE_TRAMPOLINE c_name, cxx_name
383ENTRY \c_name
384    INVOKE_TRAMPOLINE_BODY \cxx_name
385END \c_name
386.endm
387
388INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, \
389                  artInvokeInterfaceTrampolineWithAccessCheck
390
391INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, \
392                  artInvokeStaticTrampolineWithAccessCheck
393INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, \
394                  artInvokeDirectTrampolineWithAccessCheck
395INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, \
396                  artInvokeSuperTrampolineWithAccessCheck
397INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, \
398                  artInvokeVirtualTrampolineWithAccessCheck
399
400
401.macro INVOKE_STUB_CREATE_FRAME
402SAVE_SIZE=8*8   // x4, x5, <padding>, x19, x20, x21, FP, LR saved.
403    SAVE_TWO_REGS_INCREASE_FRAME x4, x5, SAVE_SIZE
404    SAVE_REG      x19,      24
405    SAVE_TWO_REGS x20, x21, 32
406    SAVE_TWO_REGS xFP, xLR, 48
407
408    mov xFP, sp                            // Use xFP for frame pointer, as it's callee-saved.
409    .cfi_def_cfa_register xFP
410
411    add x10, x2, #(__SIZEOF_POINTER__ + 0xf) // Reserve space for ArtMethod*, arguments and
412    and x10, x10, # ~0xf                   // round up for 16-byte stack alignment.
413    sub sp, sp, x10                        // Adjust SP for ArtMethod*, args and alignment padding.
414
415    mov xSELF, x3                          // Move thread pointer into SELF register.
416
417    // Copy arguments into stack frame.
418    // Use simple copy routine for now.
419    // 4 bytes per slot.
420    // X1 - source address
421    // W2 - args length
422    // X9 - destination address.
423    // W10 - temporary
424    add x9, sp, #8                         // Destination address is bottom of stack + null.
425
426    // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
427    // does not have unique-id variables.
428    cbz w2, 2f
4291:
430    sub w2, w2, #4      // Need 65536 bytes of range.
431    ldr w10, [x1, x2]
432    str w10, [x9, x2]
433    cbnz w2, 1b
434
4352:
436    // Store null into ArtMethod* at bottom of frame.
437    str xzr, [sp]
438.endm
439
440.macro INVOKE_STUB_CALL_AND_RETURN
441
442    REFRESH_MARKING_REGISTER
443    REFRESH_SUSPEND_CHECK_REGISTER
444
445    // load method-> METHOD_QUICK_CODE_OFFSET
446    ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
447    // Branch to method.
448    blr x9
449
450    // Pop the ArtMethod* (null), arguments and alignment padding from the stack.
451    mov sp, xFP
452    .cfi_def_cfa_register sp
453
454    // Restore saved registers including value address and shorty address.
455    RESTORE_REG      x19,      24
456    RESTORE_TWO_REGS x20, x21, 32
457    RESTORE_TWO_REGS xFP, xLR, 48
458    RESTORE_TWO_REGS_DECREASE_FRAME x4, x5, SAVE_SIZE
459
460    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
461    ldrb w10, [x5]
462
463    // Check the return type and store the correct register into the jvalue in memory.
464    // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
465
466    // Don't set anything for a void type.
467    cmp w10, #'V'
468    beq 1f
469
470    // Is it a double?
471    cmp w10, #'D'
472    beq 2f
473
474    // Is it a float?
475    cmp w10, #'F'
476    beq 3f
477
478    // Just store x0. Doesn't matter if it is 64 or 32 bits.
479    str x0, [x4]
480
4811:  // Finish up.
482    ret
483
4842:  // Store double.
485    str d0, [x4]
486    ret
487
4883:  // Store float.
489    str s0, [x4]
490    ret
491
492.endm
493
494
495// Macro for loading an argument into a register.
496//  label - the base name of the label of the load routine,
497//  reg - the register to load,
498//  args - pointer to current argument, incremented by size,
499//  size - the size of the register - 4 or 8 bytes,
500//  nh4_reg - the register to fill with the address of the next handler for 4-byte values,
501//  nh4_l - the base name of the label of the next handler for 4-byte values,
502//  nh8_reg - the register to fill with the address of the next handler for 8-byte values,
503//  nh8_l - the base name of the label of the next handler for 8-byte values,
504//  cont - the base name of the label for continuing the shorty processing loop,
505//  suffix - suffix added to all labels to make labels unique for different users.
506.macro INVOKE_STUB_LOAD_REG label, reg, args, size, nh4_reg, nh4_l, nh8_reg, nh8_l, cont, suffix
507\label\suffix:
508    ldr \reg, [\args], #\size
509    adr \nh4_reg, \nh4_l\suffix
510    adr \nh8_reg, \nh8_l\suffix
511    b \cont\suffix
512.endm
513
514// Macro for skipping an argument that does not fit into argument registers.
515//  label - the base name of the label of the skip routine,
516//  args - pointer to current argument, incremented by size,
517//  size - the size of the argument - 4 or 8 bytes,
518//  cont - the base name of the label for continuing the shorty processing loop,
519//  suffix - suffix added to all labels to make labels unique for different users.
520.macro INVOKE_STUB_SKIP_ARG label, args, size, cont, suffix
521\label\suffix:
522    add \args, \args, #\size
523    b \cont\suffix
524.endm
525
526// Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
527// Parse the passed shorty to determine which register to load.
528//  x5 - shorty,
529//  x9 - points to arguments on the stack,
530//  suffix - suffix added to all labels to make labels unique for different users.
531.macro INVOKE_STUB_LOAD_ALL_ARGS suffix
532    add x10, x5, #1                 // Load shorty address, plus one to skip the return type.
533
534    // Load this (if instance method) and addresses for routines that load WXSD registers.
535    .ifc \suffix, _instance
536        ldr w1, [x9], #4            // Load "this" parameter, and increment arg pointer.
537        adr x11, .Lload_w2\suffix
538        adr x12, .Lload_x2\suffix
539    .else
540        adr x11, .Lload_w1\suffix
541        adr x12, .Lload_x1\suffix
542    .endif
543    adr  x13, .Lload_s0\suffix
544    adr  x14, .Lload_d0\suffix
545
546    // Loop to fill registers.
547.Lfill_regs\suffix:
548    ldrb w17, [x10], #1             // Load next character in signature, and increment.
549    cbz w17, .Lcall_method\suffix   // Exit at end of signature. Shorty 0 terminated.
550
551    cmp w17, #'J'                   // Is this a long?
552    beq .Lload_long\suffix
553
554    cmp  w17, #'F'                  // Is this a float?
555    beq .Lload_float\suffix
556
557    cmp w17, #'D'                   // Is this a double?
558    beq .Lload_double\suffix
559
560    // Everything else uses a 4-byte GPR.
561    br x11
562
563.Lload_long\suffix:
564    br x12
565
566.Lload_float\suffix:
567    br x13
568
569.Lload_double\suffix:
570    br x14
571
572// Handlers for loading other args (not float/double/long) into W registers.
573    .ifnc \suffix, _instance
574        INVOKE_STUB_LOAD_REG \
575            .Lload_w1, w1, x9, 4, x11, .Lload_w2, x12, .Lload_x2, .Lfill_regs, \suffix
576    .endif
577    INVOKE_STUB_LOAD_REG .Lload_w2, w2, x9, 4, x11, .Lload_w3, x12, .Lload_x3, .Lfill_regs, \suffix
578    INVOKE_STUB_LOAD_REG .Lload_w3, w3, x9, 4, x11, .Lload_w4, x12, .Lload_x4, .Lfill_regs, \suffix
579    INVOKE_STUB_LOAD_REG .Lload_w4, w4, x9, 4, x11, .Lload_w5, x12, .Lload_x5, .Lfill_regs, \suffix
580    INVOKE_STUB_LOAD_REG .Lload_w5, w5, x9, 4, x11, .Lload_w6, x12, .Lload_x6, .Lfill_regs, \suffix
581    INVOKE_STUB_LOAD_REG .Lload_w6, w6, x9, 4, x11, .Lload_w7, x12, .Lload_x7, .Lfill_regs, \suffix
582    INVOKE_STUB_LOAD_REG .Lload_w7, w7, x9, 4, x11, .Lskip4, x12, .Lskip8, .Lfill_regs, \suffix
583
584// Handlers for loading longs into X registers.
585    .ifnc \suffix, _instance
586        INVOKE_STUB_LOAD_REG \
587            .Lload_x1, x1, x9, 8, x11, .Lload_w2, x12, .Lload_x2, .Lfill_regs, \suffix
588    .endif
589    INVOKE_STUB_LOAD_REG .Lload_x2, x2, x9, 8, x11, .Lload_w3, x12, .Lload_x3, .Lfill_regs, \suffix
590    INVOKE_STUB_LOAD_REG .Lload_x3, x3, x9, 8, x11, .Lload_w4, x12, .Lload_x4, .Lfill_regs, \suffix
591    INVOKE_STUB_LOAD_REG .Lload_x4, x4, x9, 8, x11, .Lload_w5, x12, .Lload_x5, .Lfill_regs, \suffix
592    INVOKE_STUB_LOAD_REG .Lload_x5, x5, x9, 8, x11, .Lload_w6, x12, .Lload_x6, .Lfill_regs, \suffix
593    INVOKE_STUB_LOAD_REG .Lload_x6, x6, x9, 8, x11, .Lload_w7, x12, .Lload_x7, .Lfill_regs, \suffix
594    INVOKE_STUB_LOAD_REG .Lload_x7, x7, x9, 8, x11, .Lskip4, x12, .Lskip8, .Lfill_regs, \suffix
595
596// Handlers for loading singles into S registers.
597    INVOKE_STUB_LOAD_REG .Lload_s0, s0, x9, 4, x13, .Lload_s1, x14, .Lload_d1, .Lfill_regs, \suffix
598    INVOKE_STUB_LOAD_REG .Lload_s1, s1, x9, 4, x13, .Lload_s2, x14, .Lload_d2, .Lfill_regs, \suffix
599    INVOKE_STUB_LOAD_REG .Lload_s2, s2, x9, 4, x13, .Lload_s3, x14, .Lload_d3, .Lfill_regs, \suffix
600    INVOKE_STUB_LOAD_REG .Lload_s3, s3, x9, 4, x13, .Lload_s4, x14, .Lload_d4, .Lfill_regs, \suffix
601    INVOKE_STUB_LOAD_REG .Lload_s4, s4, x9, 4, x13, .Lload_s5, x14, .Lload_d5, .Lfill_regs, \suffix
602    INVOKE_STUB_LOAD_REG .Lload_s5, s5, x9, 4, x13, .Lload_s6, x14, .Lload_d6, .Lfill_regs, \suffix
603    INVOKE_STUB_LOAD_REG .Lload_s6, s6, x9, 4, x13, .Lload_s7, x14, .Lload_d7, .Lfill_regs, \suffix
604    INVOKE_STUB_LOAD_REG .Lload_s7, s7, x9, 4, x13, .Lskip4, x14, .Lskip8, .Lfill_regs, \suffix
605
606// Handlers for loading doubles into D registers.
607    INVOKE_STUB_LOAD_REG .Lload_d0, d0, x9, 8, x13, .Lload_s1, x14, .Lload_d1, .Lfill_regs, \suffix
608    INVOKE_STUB_LOAD_REG .Lload_d1, d1, x9, 8, x13, .Lload_s2, x14, .Lload_d2, .Lfill_regs, \suffix
609    INVOKE_STUB_LOAD_REG .Lload_d2, d2, x9, 8, x13, .Lload_s3, x14, .Lload_d3, .Lfill_regs, \suffix
610    INVOKE_STUB_LOAD_REG .Lload_d3, d3, x9, 8, x13, .Lload_s4, x14, .Lload_d4, .Lfill_regs, \suffix
611    INVOKE_STUB_LOAD_REG .Lload_d4, d4, x9, 8, x13, .Lload_s5, x14, .Lload_d5, .Lfill_regs, \suffix
612    INVOKE_STUB_LOAD_REG .Lload_d5, d5, x9, 8, x13, .Lload_s6, x14, .Lload_d6, .Lfill_regs, \suffix
613    INVOKE_STUB_LOAD_REG .Lload_d6, d6, x9, 8, x13, .Lload_s7, x14, .Lload_d7, .Lfill_regs, \suffix
614    INVOKE_STUB_LOAD_REG .Lload_d7, d7, x9, 8, x13, .Lskip4, x14, .Lskip8, .Lfill_regs, \suffix
615
616// Handlers for skipping arguments that do not fit into registers.
617    INVOKE_STUB_SKIP_ARG .Lskip4, x9, 4, .Lfill_regs, \suffix
618    INVOKE_STUB_SKIP_ARG .Lskip8, x9, 8, .Lfill_regs, \suffix
619
620.Lcall_method\suffix:
621.endm
622
623/*
624 *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
625 *                                       uint32_t  *args,     x1
626 *                                       uint32_t argsize,    w2
627 *                                       Thread *self,        x3
628 *                                       JValue *result,      x4
629 *                                       char   *shorty);     x5
630 *  +----------------------+
631 *  |                      |
632 *  |  C/C++ frame         |
633 *  |       LR''           |
634 *  |       FP''           | <- SP'
635 *  +----------------------+
636 *  +----------------------+
637 *  |        x28           | <- TODO: Remove callee-saves.
638 *  |         :            |
639 *  |        x19           |
640 *  |        SP'           |
641 *  |        X5            |
642 *  |        X4            |        Saved registers
643 *  |        LR'           |
644 *  |        FP'           | <- FP
645 *  +----------------------+
646 *  | uint32_t out[n-1]    |
647 *  |    :      :          |        Outs
648 *  | uint32_t out[0]      |
649 *  | ArtMethod*           | <- SP  value=null
650 *  +----------------------+
651 *
652 * Outgoing registers:
653 *  x0    - Method*
654 *  x1-x7 - integer parameters.
655 *  d0-d7 - Floating point parameters.
656 *  xSELF = self
657 *  SP = & of ArtMethod*
658 *  x1 = "this" pointer.
659 *
660 */
661ENTRY art_quick_invoke_stub
662    // Spill registers as per AACPS64 calling convention.
663    INVOKE_STUB_CREATE_FRAME
664
665    // Load args into registers.
666    INVOKE_STUB_LOAD_ALL_ARGS _instance
667
668    // Call the method and return.
669    INVOKE_STUB_CALL_AND_RETURN
670END art_quick_invoke_stub
671
672/*  extern"C"
673 *     void art_quick_invoke_static_stub(ArtMethod *method,   x0
674 *                                       uint32_t  *args,     x1
675 *                                       uint32_t argsize,    w2
676 *                                       Thread *self,        x3
677 *                                       JValue *result,      x4
678 *                                       char   *shorty);     x5
679 */
680ENTRY art_quick_invoke_static_stub
681    // Spill registers as per AACPS64 calling convention.
682    INVOKE_STUB_CREATE_FRAME
683
684    // Load args into registers.
685    INVOKE_STUB_LOAD_ALL_ARGS _static
686
687    // Call the method and return.
688    INVOKE_STUB_CALL_AND_RETURN
689END art_quick_invoke_static_stub
690
691
692
693/*  extern"C" void art_quick_osr_stub(void** stack,                x0
694 *                                    size_t stack_size_in_bytes,  x1
695 *                                    const uint8_t* native_pc,    x2
696 *                                    JValue *result,              x3
697 *                                    char   *shorty,              x4
698 *                                    Thread *self)                x5
699 */
700ENTRY art_quick_osr_stub
701    SAVE_SIZE=22*8
702    SAVE_TWO_REGS_INCREASE_FRAME x3, x4, SAVE_SIZE
703    SAVE_TWO_REGS x19, x20, 16
704    SAVE_TWO_REGS x21, x22, 32
705    SAVE_TWO_REGS x23, x24, 48
706    SAVE_TWO_REGS x25, x26, 64
707    SAVE_TWO_REGS x27, x28, 80
708    SAVE_TWO_REGS xFP, xLR, 96
709    stp d8, d9,   [sp, #112]
710    stp d10, d11, [sp, #128]
711    stp d12, d13, [sp, #144]
712    stp d14, d15, [sp, #160]
713
714    mov xSELF, x5                         // Move thread pointer into SELF register.
715    REFRESH_MARKING_REGISTER
716    REFRESH_SUSPEND_CHECK_REGISTER
717
718    INCREASE_FRAME 16
719    str xzr, [sp]                         // Store null for ArtMethod* slot
720    // Branch to stub.
721    bl .Losr_entry
722    CFI_REMEMBER_STATE
723    DECREASE_FRAME 16
724
725    // Restore saved registers including value address and shorty address.
726    ldp d8, d9,   [sp, #112]
727    ldp d10, d11, [sp, #128]
728    ldp d12, d13, [sp, #144]
729    ldp d14, d15, [sp, #160]
730    RESTORE_TWO_REGS x19, x20, 16
731    RESTORE_TWO_REGS x21, x22, 32
732    RESTORE_TWO_REGS x23, x24, 48
733    RESTORE_TWO_REGS x25, x26, 64
734    RESTORE_TWO_REGS x27, x28, 80
735    RESTORE_TWO_REGS xFP, xLR, 96
736    RESTORE_TWO_REGS_DECREASE_FRAME x3, x4, SAVE_SIZE
737
738    // The compiler put the result in x0. Doesn't matter if it is 64 or 32 bits.
739    str x0, [x3]
740    ret
741
742.Losr_entry:
743    CFI_RESTORE_STATE_AND_DEF_CFA sp, (SAVE_SIZE+16)
744
745    mov x9, sp                             // Save stack pointer.
746    .cfi_def_cfa_register x9
747
748    // Update stack pointer for the callee
749    sub sp, sp, x1
750
751    // Update link register slot expected by the callee.
752    sub w1, w1, #8
753    str lr, [sp, x1]
754
755    // Copy arguments into stack frame.
756    // Use simple copy routine for now.
757    // 4 bytes per slot.
758    // X0 - source address
759    // W1 - args length
760    // SP - destination address.
761    // W10 - temporary
762.Losr_loop_entry:
763    cbz w1, .Losr_loop_exit
764    sub w1, w1, #4
765    ldr w10, [x0, x1]
766    str w10, [sp, x1]
767    b .Losr_loop_entry
768
769.Losr_loop_exit:
770    // Branch to the OSR entry point.
771    br x2
772
773END art_quick_osr_stub
774
775    /*
776     * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_.
777     * Both must reside on the stack, between current SP and target SP.
778     * IP0 and IP1 shall be clobbered rather than retrieved from gprs_.
779     */
780
781ENTRY art_quick_do_long_jump
782    // Load FPRs
783    ldp d0, d1, [x1, #0]
784    ldp d2, d3, [x1, #16]
785    ldp d4, d5, [x1, #32]
786    ldp d6, d7, [x1, #48]
787    ldp d8, d9, [x1, #64]
788    ldp d10, d11, [x1, #80]
789    ldp d12, d13, [x1, #96]
790    ldp d14, d15, [x1, #112]
791    ldp d16, d17, [x1, #128]
792    ldp d18, d19, [x1, #144]
793    ldp d20, d21, [x1, #160]
794    ldp d22, d23, [x1, #176]
795    ldp d24, d25, [x1, #192]
796    ldp d26, d27, [x1, #208]
797    ldp d28, d29, [x1, #224]
798    ldp d30, d31, [x1, #240]
799
800    // Load GPRs. Delay loading x0, x1 because x0 is used as gprs_.
801    ldp x2, x3, [x0, #16]
802    ldp x4, x5, [x0, #32]
803    ldp x6, x7, [x0, #48]
804    ldp x8, x9, [x0, #64]
805    ldp x10, x11, [x0, #80]
806    ldp x12, x13, [x0, #96]
807    ldp x14, x15, [x0, #112]
808    // Do not load IP0 (x16) and IP1 (x17), these shall be clobbered below.
809    // Don't load the platform register (x18) either.
810    ldr      x19, [x0, #152]      // xSELF.
811    ldp x20, x21, [x0, #160]      // For Baker RB, wMR (w20) is reloaded below.
812    ldp x22, x23, [x0, #176]
813    ldp x24, x25, [x0, #192]
814    ldp x26, x27, [x0, #208]
815    ldp x28, x29, [x0, #224]
816    ldp x30, xIP0, [x0, #240]     // LR and SP, load SP to IP0.
817
818    // Load PC to IP1, it's at the end (after the space for the unused XZR).
819    ldr xIP1, [x0, #33*8]
820
821    // Load x0, x1.
822    ldp x0, x1, [x0, #0]
823
824    // Set SP. Do not access fprs_ and gprs_ from now, they are below SP.
825    mov sp, xIP0
826
827    REFRESH_MARKING_REGISTER
828    REFRESH_SUSPEND_CHECK_REGISTER
829
830    br  xIP1
831END art_quick_do_long_jump
832
833    /*
834     * Entry from managed code that tries to lock the object in a fast path and
835     * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
836     * x0 holds the possibly null object to lock.
837     */
838ENTRY art_quick_lock_object
839    LOCK_OBJECT_FAST_PATH x0, art_quick_lock_object_no_inline, /*can_be_null*/ 1
840END art_quick_lock_object
841
842    /*
843     * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
844     * x0 holds the possibly null object to lock.
845     */
846    .extern artLockObjectFromCode
847ENTRY art_quick_lock_object_no_inline
848    // This is also the slow path for art_quick_lock_object.
849    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
850    mov    x1, xSELF                  // pass Thread::Current
851    bl     artLockObjectFromCode      // (Object* obj, Thread*)
852    RESTORE_SAVE_REFS_ONLY_FRAME
853    REFRESH_MARKING_REGISTER
854    RETURN_IF_W0_IS_ZERO_OR_DELIVER
855END art_quick_lock_object_no_inline
856
857    /*
858     * Entry from managed code that tries to unlock the object in a fast path and calls
859     * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
860     * x0 holds the possibly null object to unlock.
861     */
862ENTRY art_quick_unlock_object
863    UNLOCK_OBJECT_FAST_PATH x0, art_quick_unlock_object_no_inline, /*can_be_null*/ 1
864END art_quick_unlock_object
865
866    /*
867     * Entry from managed code that calls `artUnlockObjectFromCode()`
868     * and delivers exception on failure.
869     * x0 holds the possibly null object to unlock.
870     */
871    .extern artUnlockObjectFromCode
872ENTRY art_quick_unlock_object_no_inline
873    // This is also the slow path for art_quick_unlock_object.
874    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
875    mov    x1, xSELF                  // pass Thread::Current
876    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
877    RESTORE_SAVE_REFS_ONLY_FRAME
878    REFRESH_MARKING_REGISTER
879    RETURN_IF_W0_IS_ZERO_OR_DELIVER
880END art_quick_unlock_object_no_inline
881
882    /*
883     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
884     * artThrowClassCastExceptionForObject.
885     */
886    .extern artInstanceOfFromCode
887    .extern artThrowClassCastExceptionForObject
888ENTRY art_quick_check_instance_of
889    // Type check using the bit string passes null as the target class. In that case just throw.
890    cbz x1, .Lthrow_class_cast_exception_for_bitstring_check
891
892    // Store arguments and link register
893    // Stack needs to be 16B aligned on calls.
894    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
895    SAVE_REG xLR, 24
896
897    // Call runtime code
898    bl artInstanceOfFromCode
899
900    // Restore LR.
901    RESTORE_REG xLR, 24
902
903    // Check for exception
904    cbz x0, .Lthrow_class_cast_exception
905
906    // Restore and return
907    CFI_REMEMBER_STATE
908    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
909    ret
910    CFI_RESTORE_STATE_AND_DEF_CFA sp, 32
911
912.Lthrow_class_cast_exception:
913    // Restore
914    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
915
916.Lthrow_class_cast_exception_for_bitstring_check:
917    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
918    mov x2, xSELF                     // pass Thread::Current
919    bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
920    brk 0                             // We should not return here...
921END art_quick_check_instance_of
922
923// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
924.macro POP_REG_NE xReg, offset, xExclude
925    .ifnc \xReg, \xExclude
926        ldr \xReg, [sp, #\offset]     // restore xReg
927        .cfi_restore \xReg
928    .endif
929.endm
930
931// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
932// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
933.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
934    .ifc \xReg1, \xExclude
935        ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
936    .else
937        .ifc \xReg2, \xExclude
938            ldr \xReg1, [sp, #\offset]          // restore xReg1
939        .else
940            ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
941        .endif
942    .endif
943    .cfi_restore \xReg1
944    .cfi_restore \xReg2
945.endm
946
947    // Helper macros for `art_quick_aput_obj`.
948#ifdef USE_READ_BARRIER
949#ifdef USE_BAKER_READ_BARRIER
950.macro BAKER_RB_CHECK_GRAY_BIT_AND_LOAD wDest, xObj, offset, gray_slow_path_label
951    ldr wIP0, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
952    tbnz wIP0, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, \gray_slow_path_label
953    // False dependency to avoid needing load/load fence.
954    add \xObj, \xObj, xIP0, lsr #32
955    ldr \wDest, [\xObj, #\offset]                      // Heap reference = 32b; zero-extends to xN.
956    UNPOISON_HEAP_REF \wDest
957.endm
958
959.macro BAKER_RB_LOAD_AND_MARK wDest, xObj, offset, mark_function
960    ldr \wDest, [\xObj, #\offset]                      // Heap reference = 32b; zero-extends to xN.
961    UNPOISON_HEAP_REF \wDest
962    // Save LR in a register preserved by `art_quick_read_barrier_mark_regNN`
963    // and unused by the `art_quick_aput_obj`.
964    mov x5, lr
965    bl \mark_function
966    mov lr, x5                                         // Restore LR.
967.endm
968#else  // USE_BAKER_READ_BARRIER
969    .extern artReadBarrierSlow
970.macro READ_BARRIER_SLOW xDest, wDest, xObj, offset
971    // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
972    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 48
973    SAVE_TWO_REGS x2, x3, 16
974    SAVE_TWO_REGS x4, xLR, 32
975
976    // mov x0, \xRef                // pass ref in x0 (no-op for now since parameter ref is unused)
977    .ifnc \xObj, x1
978        mov x1, \xObj               // pass xObj
979    .endif
980    mov w2, #\offset                // pass offset
981    bl artReadBarrierSlow           // artReadBarrierSlow(ref, xObj, offset)
982    // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
983    .ifnc \wDest, w0
984        mov \wDest, w0              // save return value in wDest
985    .endif
986
987    // Conditionally restore saved registers
988    POP_REG_NE x0, 0, \xDest
989    POP_REG_NE x1, 8, \xDest
990    POP_REG_NE x2, 16, \xDest
991    POP_REG_NE x3, 24, \xDest
992    POP_REG_NE x4, 32, \xDest
993    RESTORE_REG xLR, 40
994    DECREASE_FRAME 48
995.endm
996#endif // USE_BAKER_READ_BARRIER
997#endif  // USE_READ_BARRIER
998
999ENTRY art_quick_aput_obj
1000    cbz x2, .Laput_obj_null
1001#if defined(USE_READ_BARRIER) && !defined(USE_BAKER_READ_BARRIER)
1002    READ_BARRIER_SLOW x3, w3, x0, MIRROR_OBJECT_CLASS_OFFSET
1003    READ_BARRIER_SLOW x3, w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
1004    READ_BARRIER_SLOW x4, w4, x2, MIRROR_OBJECT_CLASS_OFFSET
1005#else  // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
1006#ifdef USE_READ_BARRIER
1007    cbnz wMR, .Laput_obj_gc_marking
1008#endif  // USE_READ_BARRIER
1009    ldr w3, [x0, #MIRROR_OBJECT_CLASS_OFFSET]          // Heap reference = 32b; zero-extends to x3.
1010    UNPOISON_HEAP_REF w3
1011    ldr w3, [x3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]  // Heap reference = 32b; zero-extends to x3.
1012    UNPOISON_HEAP_REF w3
1013    ldr w4, [x2, #MIRROR_OBJECT_CLASS_OFFSET]          // Heap reference = 32b; zero-extends to x4.
1014    UNPOISON_HEAP_REF w4
1015#endif  // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
1016    cmp w3, w4  // value's type == array's component type - trivial assignability
1017    bne .Laput_obj_check_assignability
1018.Laput_obj_store:
1019    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1020    POISON_HEAP_REF w2
1021    str w2, [x3, x1, lsl #2]                           // Heap reference = 32b.
1022    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1023    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
1024    strb w3, [x3, x0]
1025    ret
1026
1027.Laput_obj_null:
1028    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1029    str w2, [x3, x1, lsl #2]                           // Heap reference = 32b.
1030    ret
1031
1032.Laput_obj_check_assignability:
1033    // Store arguments and link register
1034    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
1035    SAVE_TWO_REGS x2, xLR, 16
1036
1037    // Call runtime code
1038    mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1039    mov x1, x4              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1040    bl artIsAssignableFromCode
1041
1042    // Check for exception
1043    cbz x0, .Laput_obj_throw_array_store_exception
1044
1045    // Restore
1046    CFI_REMEMBER_STATE
1047    RESTORE_TWO_REGS x2, xLR, 16
1048    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1049
1050    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1051    POISON_HEAP_REF w2
1052    str w2, [x3, x1, lsl #2]                           // Heap reference = 32b.
1053    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1054    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
1055    strb w3, [x3, x0]
1056    ret
1057    CFI_RESTORE_STATE_AND_DEF_CFA sp, 32
1058
1059.Laput_obj_throw_array_store_exception:
1060    RESTORE_TWO_REGS x2, xLR, 16
1061    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1062
1063#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
1064    CFI_REMEMBER_STATE
1065#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
1066    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
1067    mov x1, x2                      // Pass value.
1068    mov x2, xSELF                   // Pass Thread::Current.
1069    bl artThrowArrayStoreException  // (Object*, Object*, Thread*).
1070    brk 0                           // Unreachable.
1071
1072#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
1073    CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
1074.Laput_obj_gc_marking:
1075    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
1076        w3, x0, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_array_class
1077.Laput_obj_mark_array_class_continue:
1078    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
1079        w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, .Laput_obj_mark_array_element
1080.Laput_obj_mark_array_element_continue:
1081    BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
1082        w4, x2, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_object_class
1083.Laput_obj_mark_object_class_continue:
1084    cmp w3, w4  // value's type == array's component type - trivial assignability
1085    bne .Laput_obj_check_assignability
1086    b   .Laput_obj_store
1087
1088.Laput_obj_mark_array_class:
1089    BAKER_RB_LOAD_AND_MARK w3, x0, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg03
1090    b .Laput_obj_mark_array_class_continue
1091
1092.Laput_obj_mark_array_element:
1093    BAKER_RB_LOAD_AND_MARK \
1094        w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, art_quick_read_barrier_mark_reg03
1095    b .Laput_obj_mark_array_element_continue
1096
1097.Laput_obj_mark_object_class:
1098    BAKER_RB_LOAD_AND_MARK w4, x2, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg04
1099    b .Laput_obj_mark_object_class_continue
1100#endif  // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
1101END art_quick_aput_obj
1102
1103// Macro to facilitate adding new allocation entrypoints.
1104.macro ONE_ARG_DOWNCALL name, entrypoint, return
1105    .extern \entrypoint
1106ENTRY \name
1107    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1108    mov    x1, xSELF                  // pass Thread::Current
1109    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1110    RESTORE_SAVE_REFS_ONLY_FRAME
1111    REFRESH_MARKING_REGISTER
1112    \return
1113END \name
1114.endm
1115
1116// Macro to facilitate adding new allocation entrypoints.
1117.macro TWO_ARG_DOWNCALL name, entrypoint, return
1118    .extern \entrypoint
1119ENTRY \name
1120    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1121    mov    x2, xSELF                  // pass Thread::Current
1122    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1123    RESTORE_SAVE_REFS_ONLY_FRAME
1124    REFRESH_MARKING_REGISTER
1125    \return
1126END \name
1127.endm
1128
1129// Macro to facilitate adding new allocation entrypoints.
1130.macro THREE_ARG_DOWNCALL name, entrypoint, return
1131    .extern \entrypoint
1132ENTRY \name
1133    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1134    mov    x3, xSELF                  // pass Thread::Current
1135    bl     \entrypoint
1136    RESTORE_SAVE_REFS_ONLY_FRAME
1137    REFRESH_MARKING_REGISTER
1138    \return
1139END \name
1140.endm
1141
1142// Macro to facilitate adding new allocation entrypoints.
1143.macro FOUR_ARG_DOWNCALL name, entrypoint, return
1144    .extern \entrypoint
1145ENTRY \name
1146    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1147    mov    x4, xSELF                  // pass Thread::Current
1148    bl     \entrypoint                //
1149    RESTORE_SAVE_REFS_ONLY_FRAME
1150    REFRESH_MARKING_REGISTER
1151    \return
1152END \name
1153.endm
1154
1155// Macros taking opportunity of code similarities for downcalls.
1156.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
1157    .extern \entrypoint
1158ENTRY \name
1159    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1160    mov    x1, xSELF                  // pass Thread::Current
1161    bl     \entrypoint                // (uint32_t type_idx, Thread*)
1162    RESTORE_SAVE_REFS_ONLY_FRAME
1163    REFRESH_MARKING_REGISTER
1164    \return
1165END \name
1166.endm
1167
1168.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
1169    .extern \entrypoint
1170ENTRY \name
1171    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1172    mov    x2, xSELF                  // pass Thread::Current
1173    bl     \entrypoint
1174    RESTORE_SAVE_REFS_ONLY_FRAME
1175    REFRESH_MARKING_REGISTER
1176    \return
1177END \name
1178.endm
1179
1180.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
1181    .extern \entrypoint
1182ENTRY \name
1183    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1184    mov    x3, xSELF                  // pass Thread::Current
1185    bl     \entrypoint
1186    RESTORE_SAVE_REFS_ONLY_FRAME
1187    REFRESH_MARKING_REGISTER
1188    \return
1189END \name
1190.endm
1191
1192    /*
1193     * Macro for resolution and initialization of indexed DEX file
1194     * constants such as classes and strings.
1195     */
1196.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL \
1197        name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
1198    .extern \entrypoint
1199ENTRY \name
1200    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset       // save everything for stack crawl
1201    mov   x1, xSELF                   // pass Thread::Current
1202    bl    \entrypoint                 // (int32_t index, Thread* self)
1203    cbz   w0, 1f                      // If result is null, deliver the OOME.
1204    DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_X0 x1, /* is_ref= */ 1
12051:
1206    DELIVER_PENDING_EXCEPTION_FRAME_READY
1207END \name
1208.endm
1209
1210.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
1211    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \
1212            \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
1213.endm
1214
1215.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DEOPT_OR_DELIVER
1216    cbz w0, 1f                       // result zero branch over
1217    DEOPT_OR_RETURN x1, /*is_ref=*/1 // check for deopt or return
12181:
1219    DELIVER_PENDING_EXCEPTION
1220.endm
1221
1222
1223    /*
1224     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
1225     * failure.
1226     */
1227TWO_ARG_REF_DOWNCALL \
1228        art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1229
1230    /*
1231     * Entry from managed code when uninitialized static storage, this stub will run the class
1232     * initializer and deliver the exception on error. On success the static storage base is
1233     * returned.
1234     */
1235ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT \
1236        art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1237ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1238ONE_ARG_SAVE_EVERYTHING_DOWNCALL \
1239        art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1240ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1241ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1242ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1243
1244// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
1245// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1246
1247ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, \
1248                     artGetBooleanStaticFromCompiledCode, \
1249                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1250ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, \
1251                     artGetByteStaticFromCompiledCode, \
1252                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1253ONE_ARG_REF_DOWNCALL art_quick_get_char_static, \
1254                     artGetCharStaticFromCompiledCode, \
1255                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1256ONE_ARG_REF_DOWNCALL art_quick_get_short_static, \
1257                     artGetShortStaticFromCompiledCode, \
1258                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1259ONE_ARG_REF_DOWNCALL art_quick_get32_static, \
1260                     artGet32StaticFromCompiledCode, \
1261                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1262ONE_ARG_REF_DOWNCALL art_quick_get64_static, \
1263                     artGet64StaticFromCompiledCode, \
1264                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1265ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, \
1266                     artGetObjStaticFromCompiledCode, \
1267                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1268
1269TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, \
1270                     artGetBooleanInstanceFromCompiledCode, \
1271                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1272TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, \
1273                     artGetByteInstanceFromCompiledCode, \
1274                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1275TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, \
1276                     artGetCharInstanceFromCompiledCode, \
1277                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1278TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, \
1279                     artGetShortInstanceFromCompiledCode, \
1280                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1281TWO_ARG_REF_DOWNCALL art_quick_get32_instance, \
1282                     artGet32InstanceFromCompiledCode, \
1283                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1284TWO_ARG_REF_DOWNCALL art_quick_get64_instance, \
1285                     artGet64InstanceFromCompiledCode, \
1286                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1287TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, \
1288                     artGetObjInstanceFromCompiledCode, \
1289                     RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
1290
1291TWO_ARG_REF_DOWNCALL \
1292    art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1293TWO_ARG_REF_DOWNCALL \
1294    art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1295TWO_ARG_REF_DOWNCALL \
1296    art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1297TWO_ARG_REF_DOWNCALL \
1298    art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1299TWO_ARG_REF_DOWNCALL \
1300    art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1301
1302THREE_ARG_REF_DOWNCALL \
1303    art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1304THREE_ARG_REF_DOWNCALL \
1305    art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1306THREE_ARG_REF_DOWNCALL \
1307    art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1308THREE_ARG_REF_DOWNCALL \
1309    art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1310THREE_ARG_REF_DOWNCALL \
1311    art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1312
1313// Generate the allocation entrypoints for each allocator.
1314GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1315// Comment out allocators that have arm64 specific asm.
1316// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1317// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1318GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1319GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
1320// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1321// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1322// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1323// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1324// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1325GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1326GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1327GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1328
1329// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1330// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1331GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1332GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
1333// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1334// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1335// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1336// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1337// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1338GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1339GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1340GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1341
1342// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1343// If isInitialized=0 the compiler can only assume it's been at least resolved.
1344.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
1345ENTRY \c_name
1346    // Fast path rosalloc allocation.
1347    // x0: type, xSELF(x19): Thread::Current
1348    // x1-x7: free.
1349    ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
1350                                                              // allocation stack has room.
1351                                                              // ldp won't work due to large offset.
1352    ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
1353    cmp    x3, x4
1354    bhs    .Lslow_path\c_name
1355    ldr    w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x3)
1356    cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1357                                                              // local allocation.
1358    // If the class is not yet visibly initialized, or it is finalizable,
1359    // the object size will be very large to force the branch below to be taken.
1360    //
1361    // See Class::SetStatus() in class.cc for more details.
1362    bhs    .Lslow_path\c_name
1363                                                              // Compute the rosalloc bracket index
1364                                                              // from the size. Since the size is
1365                                                              // already aligned we can combine the
1366                                                              // two shifts together.
1367    add    x4, xSELF, x3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
1368                                                              // Subtract pointer size since ther
1369                                                              // are no runs for 0 byte allocations
1370                                                              // and the size is already aligned.
1371    ldr    x4, [x4, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
1372                                                              // Load the free list head (x3). This
1373                                                              // will be the return val.
1374    ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1375    cbz    x3, .Lslow_path\c_name
1376    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1377    ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1378                                                              // and update the list head with the
1379                                                              // next pointer.
1380    str    x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1381                                                              // Store the class pointer in the
1382                                                              // header. This also overwrites the
1383                                                              // next pointer. The offsets are
1384                                                              // asserted to match.
1385
1386#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1387#error "Class pointer needs to overwrite next pointer."
1388#endif
1389    POISON_HEAP_REF w0
1390    str    w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
1391                                                              // Push the new object onto the thread
1392                                                              // local allocation stack and
1393                                                              // increment the thread local
1394                                                              // allocation stack top.
1395    ldr    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1396    str    w3, [x1], #COMPRESSED_REFERENCE_SIZE               // (Increment x1 as a side effect.)
1397    str    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1398                                                              // Decrement the size of the free list
1399
1400    // After this "STR" the object is published to the thread local allocation stack,
1401    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1402    // It is not yet visible to the running (user) compiled code until after the return.
1403    //
1404    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1405    // the state of the allocation stack slot. It can be a pointer to one of:
1406    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
1407    //       (The stack initial state is "null" pointers).
1408    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
1409    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1410    // Other states are not allowed.
1411    //
1412    // An object that is invalid only temporarily, and will eventually become valid.
1413    // The internal runtime code simply checks if the object is not null or is partial and then
1414    // ignores it.
1415    //
1416    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
1417    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
1418    // "next" pointer is not-cyclic.)
1419    //
1420    // See also b/28790624 for a listing of CLs dealing with this race.
1421    ldr    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1422    sub    x1, x1, #1
1423                                                              // TODO: consider combining this store
1424                                                              // and the list head store above using
1425                                                              // strd.
1426    str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1427
1428    mov    x0, x3                                             // Set the return value and return.
1429    // No barrier. The class is already observably initialized (otherwise the fast
1430    // path size check above would fail) and new-instance allocations are protected
1431    // from publishing by the compiler which inserts its own StoreStore barrier.
1432    ret
1433.Lslow_path\c_name:
1434    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
1435    mov    x1, xSELF                                // pass Thread::Current
1436    bl     \cxx_name
1437    RESTORE_SAVE_REFS_ONLY_FRAME
1438    REFRESH_MARKING_REGISTER
1439    RETURN_IF_RESULT_IS_NON_ZERO_OR_DEOPT_OR_DELIVER
1440END \c_name
1441.endm
1442
1443ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, \
1444                                artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1445ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, \
1446                                artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1447
1448// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1449// If isInitialized=0 the compiler can only assume it's been at least resolved.
1450.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel isInitialized
1451    ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
1452    ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
1453    ldr    w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x7).
1454    add    x6, x4, x7                                         // Add object size to tlab pos.
1455    cmp    x6, x5                                             // Check if it fits, overflow works
1456                                                              // since the tlab pos and end are 32
1457                                                              // bit values.
1458
1459    // If the class is not yet visibly initialized, or it is finalizable,
1460    // the object size will be very large to force the branch below to be taken.
1461    //
1462    // See Class::SetStatus() in class.cc for more details.
1463    bhi    \slowPathLabel
1464    str    x6, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1465    ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
1466    add    x5, x5, #1
1467    str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1468    POISON_HEAP_REF w0
1469    str    w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1470    mov    x0, x4
1471    // No barrier. The class is already observably initialized (otherwise the fast
1472    // path size check above would fail) and new-instance allocations are protected
1473    // from publishing by the compiler which inserts its own StoreStore barrier.
1474    ret
1475.endm
1476
1477// The common code for art_quick_alloc_object_*region_tlab
1478// Currently the implementation ignores isInitialized. TODO(b/172087402): clean this up.
1479// Caller must execute a constructor fence after this.
1480.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1481ENTRY \name
1482    // Fast path region tlab allocation.
1483    // x0: type, xSELF(x19): Thread::Current
1484    // x1-x7: free.
1485    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name, \isInitialized
1486.Lslow_path\name:
1487    SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
1488    mov    x1, xSELF                           // Pass Thread::Current.
1489    bl     \entrypoint                         // (mirror::Class*, Thread*)
1490    RESTORE_SAVE_REFS_ONLY_FRAME
1491    REFRESH_MARKING_REGISTER
1492    RETURN_IF_RESULT_IS_NON_ZERO_OR_DEOPT_OR_DELIVER
1493END \name
1494.endm
1495
1496GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
1497    art_quick_alloc_object_resolved_region_tlab, \
1498    artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1499GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
1500    art_quick_alloc_object_initialized_region_tlab, \
1501    artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1502GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
1503    art_quick_alloc_object_resolved_tlab, \
1504    artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1505GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
1506    art_quick_alloc_object_initialized_tlab, \
1507    artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1508
1509.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE \
1510    slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1511    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignment mask
1512                                                              // (addr + 7) & ~7. The mask must
1513                                                              // be 64 bits to keep high bits in
1514                                                              // case of overflow.
1515    // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
1516    // Negative ints become large 64 bit unsigned ints which will always be larger than max signed
1517    // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
1518    cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
1519    bhs    \slowPathLabel                                     // path.
1520
1521    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
1522                                                              // we use (end - begin) to handle
1523                                                              // negative size arrays. It is
1524                                                              // assumed that a negative size will
1525                                                              // always be greater unsigned than
1526                                                              // region size.
1527    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
1528    sub    \xTemp2, \xTemp2, \xTemp0
1529    cmp    \xTemp1, \xTemp2
1530
1531    // The array class is always initialized here. Unlike new-instance,
1532    // this does not act as a double test.
1533    bhi    \slowPathLabel
1534    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1535                                                              // Move old thread_local_pos to x0
1536                                                              // for the return value.
1537    mov    x0, \xTemp0
1538    add    \xTemp0, \xTemp0, \xTemp1
1539    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
1540    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
1541    add    \xTemp0, \xTemp0, #1
1542    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1543    POISON_HEAP_REF \wClass
1544    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
1545    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
1546// new-array is special. The class is loaded and immediately goes to the Initialized state
1547// before it is published. Therefore the only fence needed is for the publication of the object.
1548// See ClassLinker::CreateArrayClass() for more details.
1549
1550// For publication of the new array, we don't need a 'dmb ishst' here.
1551// The compiler generates 'dmb ishst' for all new-array insts.
1552    ret
1553.endm
1554
1555// Caller must execute a constructor fence after this.
1556.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1557ENTRY \name
1558    // Fast path array allocation for region tlab allocation.
1559    // x0: mirror::Class* type
1560    // x1: int32_t component_count
1561    // x2-x7: free.
1562    mov    x3, x0
1563    \size_setup x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
1564    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE \
1565        .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
1566.Lslow_path\name:
1567    // x0: mirror::Class* klass
1568    // x1: int32_t component_count
1569    // x2: Thread* self
1570    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1571    mov    x2, xSELF                  // pass Thread::Current
1572    bl     \entrypoint
1573    RESTORE_SAVE_REFS_ONLY_FRAME
1574    REFRESH_MARKING_REGISTER
1575    RETURN_IF_RESULT_IS_NON_ZERO_OR_DEOPT_OR_DELIVER
1576END \name
1577.endm
1578
1579.macro COMPUTE_ARRAY_SIZE_UNKNOWN \
1580        xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1581    // Array classes are never finalizable or uninitialized, no need to check.
1582    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
1583    UNPOISON_HEAP_REF \wTemp0
1584    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1585    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
1586                                                              // bits.
1587                                                              // xCount is holding a 32 bit value,
1588                                                              // it can not overflow.
1589    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
1590    // Add array data offset and alignment.
1591    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1592#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1593#error Long array data offset must be 4 greater than int array data offset.
1594#endif
1595
1596    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
1597                                                              // component size shift is 3
1598                                                              // (for 64 bit alignment).
1599    and    \xTemp0, \xTemp0, #4
1600    add    \xTemp1, \xTemp1, \xTemp0
1601.endm
1602
1603.macro COMPUTE_ARRAY_SIZE_8 \
1604        xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1605    // Add array data offset and alignment.
1606    add    \xTemp1, \xCount, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1607.endm
1608
1609.macro COMPUTE_ARRAY_SIZE_16 \
1610        xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1611    lsl    \xTemp1, \xCount, #1
1612    // Add array data offset and alignment.
1613    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1614.endm
1615
1616.macro COMPUTE_ARRAY_SIZE_32 \
1617        xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1618    lsl    \xTemp1, \xCount, #2
1619    // Add array data offset and alignment.
1620    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1621.endm
1622
1623.macro COMPUTE_ARRAY_SIZE_64 \
1624        xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1625    lsl    \xTemp1, \xCount, #3
1626    // Add array data offset and alignment.
1627    add    \xTemp1, \xTemp1, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1628.endm
1629
1630// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
1631// the entrypoint once all backends have been updated to use the size variants.
1632GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, \
1633                          artAllocArrayFromCodeResolvedRegionTLAB, \
1634                          COMPUTE_ARRAY_SIZE_UNKNOWN
1635GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, \
1636                          artAllocArrayFromCodeResolvedRegionTLAB, \
1637                          COMPUTE_ARRAY_SIZE_8
1638GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, \
1639                          artAllocArrayFromCodeResolvedRegionTLAB, \
1640                          COMPUTE_ARRAY_SIZE_16
1641GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, \
1642                          artAllocArrayFromCodeResolvedRegionTLAB, \
1643                          COMPUTE_ARRAY_SIZE_32
1644GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, \
1645                          artAllocArrayFromCodeResolvedRegionTLAB, \
1646                          COMPUTE_ARRAY_SIZE_64
1647GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, \
1648                          artAllocArrayFromCodeResolvedTLAB, \
1649                          COMPUTE_ARRAY_SIZE_UNKNOWN
1650GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, \
1651                          artAllocArrayFromCodeResolvedTLAB, \
1652                          COMPUTE_ARRAY_SIZE_8
1653GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, \
1654                          artAllocArrayFromCodeResolvedTLAB, \
1655                          COMPUTE_ARRAY_SIZE_16
1656GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, \
1657                          artAllocArrayFromCodeResolvedTLAB, \
1658                          COMPUTE_ARRAY_SIZE_32
1659GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, \
1660                          artAllocArrayFromCodeResolvedTLAB, \
1661                          COMPUTE_ARRAY_SIZE_64
1662
1663    /*
1664     * Called by managed code when the thread has been asked to suspend.
1665     */
1666    .extern artTestSuspendFromCode
1667ENTRY art_quick_test_suspend
1668                                        // Save callee saves for stack crawl.
1669    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
1670    mov    x0, xSELF
1671    bl     artTestSuspendFromCode       // (Thread*)
1672    RESTORE_SAVE_EVERYTHING_FRAME
1673    REFRESH_MARKING_REGISTER
1674    REFRESH_SUSPEND_CHECK_REGISTER
1675    ret
1676END art_quick_test_suspend
1677
1678    /*
1679     * Redirection point from implicit suspend check fault handler.
1680     */
1681    .extern artImplicitSuspendFromCode
1682ENTRY art_quick_implicit_suspend
1683                                        // Save callee saves for stack crawl.
1684    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
1685    mov    x0, xSELF
1686    bl     artImplicitSuspendFromCode   // (Thread*)
1687    RESTORE_SAVE_EVERYTHING_FRAME
1688    REFRESH_MARKING_REGISTER
1689    REFRESH_SUSPEND_CHECK_REGISTER
1690    br     lr  // Do not use RET as we do not enter the entrypoint with "BL".
1691END art_quick_implicit_suspend
1692
1693     /*
1694     * Called by managed code that is attempting to call a method on a proxy class. On entry
1695     * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
1696     * method agrees with a ref and args callee save frame.
1697     */
1698     .extern artQuickProxyInvokeHandler
1699ENTRY art_quick_proxy_invoke_handler
1700    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
1701    mov     x2, xSELF                   // pass Thread::Current
1702    mov     x3, sp                      // pass SP
1703    bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
1704    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
1705    cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
1706    CFI_REMEMBER_STATE
1707    RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
1708    REFRESH_MARKING_REGISTER
1709    fmov    d0, x0                      // Store result in d0 in case it was float or double
1710    ret                                 // return on success
1711.Lexception_in_proxy:
1712    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
1713    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1714    DELIVER_PENDING_EXCEPTION
1715END art_quick_proxy_invoke_handler
1716
1717    /*
1718     * Called to resolve an imt conflict.
1719     * x0 is the conflict ArtMethod.
1720     * xIP1 is a hidden argument that holds the target interface method.
1721     *
1722     * Note that this stub writes to xIP0, xIP1, and x0.
1723     */
1724ENTRY art_quick_imt_conflict_trampoline
1725    ldr xIP0, [x0, #ART_METHOD_JNI_OFFSET_64]  // Load ImtConflictTable
1726    ldr x0, [xIP0]  // Load first entry in ImtConflictTable.
1727.Limt_table_iterate:
1728    cmp x0, xIP1
1729    // Branch if found. Benchmarks have shown doing a branch here is better.
1730    beq .Limt_table_found
1731    // If the entry is null, the interface method is not in the ImtConflictTable.
1732    cbz x0, .Lconflict_trampoline
1733    // Iterate over the entries of the ImtConflictTable.
1734    ldr x0, [xIP0, #(2 * __SIZEOF_POINTER__)]!
1735    b .Limt_table_iterate
1736.Limt_table_found:
1737    // We successfully hit an entry in the table. Load the target method
1738    // and jump to it.
1739    ldr x0, [xIP0, #__SIZEOF_POINTER__]
1740    ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
1741    br xIP0
1742.Lconflict_trampoline:
1743    // Call the runtime stub to populate the ImtConflictTable and jump to the
1744    // resolved method.
1745    mov x0, xIP1  // Load interface method
1746    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1747END art_quick_imt_conflict_trampoline
1748
1749ENTRY art_quick_resolution_trampoline
1750    SETUP_SAVE_REFS_AND_ARGS_FRAME
1751    mov x2, xSELF
1752    mov x3, sp
1753    bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
1754    cbz x0, 1f
1755    CFI_REMEMBER_STATE
1756    mov xIP0, x0            // Remember returned code pointer in xIP0.
1757    ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
1758    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1759    REFRESH_MARKING_REGISTER
1760    br xIP0
17611:
1762    CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
1763    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1764    DELIVER_PENDING_EXCEPTION
1765END art_quick_resolution_trampoline
1766
1767/*
1768 * Generic JNI frame layout:
1769 *
1770 * #-------------------#
1771 * |                   |
1772 * | caller method...  |
1773 * #-------------------#    <--- SP on entry
1774 * | Return X30/LR     |
1775 * | X29/FP            |    callee save
1776 * | X28               |    callee save
1777 * | X27               |    callee save
1778 * | X26               |    callee save
1779 * | X25               |    callee save
1780 * | X24               |    callee save
1781 * | X23               |    callee save
1782 * | X22               |    callee save
1783 * | X21               |    callee save
1784 * | X20               |    callee save
1785 * | X7                |    arg7
1786 * | X6                |    arg6
1787 * | X5                |    arg5
1788 * | X4                |    arg4
1789 * | X3                |    arg3
1790 * | X2                |    arg2
1791 * | X1                |    arg1
1792 * | D7                |    float arg 8
1793 * | D6                |    float arg 7
1794 * | D5                |    float arg 6
1795 * | D4                |    float arg 5
1796 * | D3                |    float arg 4
1797 * | D2                |    float arg 3
1798 * | D1                |    float arg 2
1799 * | D0                |    float arg 1
1800 * | padding           | // 8B
1801 * | Method*           | <- X0 (Managed frame similar to SaveRefsAndArgs.)
1802 * #-------------------#
1803 * | local ref cookie  | // 4B
1804 * | padding           | // 0B or 4B to align stack args on 8B address
1805 * #-------------------#
1806 * | JNI Stack Args    | // Empty if all args fit into registers x0-x7, d0-d7.
1807 * #-------------------#    <--- SP on native call (1)
1808 * | Free scratch      |
1809 * #-------------------#
1810 * | SP for JNI call   | // Pointer to (1).
1811 * #-------------------#
1812 * | Hidden arg        | // For @CriticalNative
1813 * #-------------------#
1814 * |                   |
1815 * | Stack for Regs    |    The trampoline assembly will pop these values
1816 * |                   |    into registers for native call
1817 * #-------------------#
1818 */
1819    /*
1820     * Called to do a generic JNI down-call
1821     */
1822    .extern artQuickGenericJniTrampoline
1823ENTRY art_quick_generic_jni_trampoline
1824    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
1825
1826    // Save SP, so we can have static CFI info.
1827    mov x28, sp
1828    .cfi_def_cfa_register x28
1829
1830    mov xIP0, #GENERIC_JNI_TRAMPOLINE_RESERVED_AREA
1831    sub sp, sp, xIP0
1832
1833    // prepare for artQuickGenericJniTrampoline call
1834    // (Thread*, managed_sp, reserved_area)
1835    //    x0         x1            x2   <= C calling convention
1836    //  xSELF       x28            sp   <= where they are
1837
1838    mov x0, xSELF   // Thread*
1839    mov x1, x28     // SP for the managed frame.
1840    mov x2, sp      // reserved area for arguments and other saved data (up to managed frame)
1841    bl artQuickGenericJniTrampoline  // (Thread*, sp)
1842
1843    // The C call will have registered the complete save-frame on success.
1844    // The result of the call is:
1845    //     x0: pointer to native code, 0 on error.
1846    //     The bottom of the reserved area contains values for arg registers,
1847    //     hidden arg register and SP for out args for the call.
1848
1849    // Check for error (class init check or locking for synchronized native method can throw).
1850    cbz x0, .Lexception_in_native
1851
1852    // Save the code pointer
1853    mov xIP0, x0
1854
1855    // Load parameters from frame into registers.
1856    ldp x0, x1, [sp]
1857    ldp x2, x3, [sp, #16]
1858    ldp x4, x5, [sp, #32]
1859    ldp x6, x7, [sp, #48]
1860
1861    ldp d0, d1, [sp, #64]
1862    ldp d2, d3, [sp, #80]
1863    ldp d4, d5, [sp, #96]
1864    ldp d6, d7, [sp, #112]
1865
1866    // Load hidden arg (x15) for @CriticalNative and SP for out args.
1867    ldp x15, xIP1, [sp, #128]
1868
1869    // Apply the new SP for out args, releasing unneeded reserved area.
1870    mov sp, xIP1
1871
1872    blr xIP0        // native call.
1873
1874    // result sign extension is handled in C code
1875    // prepare for artQuickGenericJniEndTrampoline call
1876    // (Thread*, result, result_f)
1877    //    x0       x1       x2        <= C calling convention
1878    mov x1, x0      // Result (from saved).
1879    mov x0, xSELF   // Thread register.
1880    fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
1881
1882    bl artQuickGenericJniEndTrampoline
1883
1884    // Pending exceptions possible.
1885    ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
1886    cbnz x2, .Lexception_in_native
1887
1888    // Tear down the alloca.
1889    mov sp, x28
1890
1891    LOAD_RUNTIME_INSTANCE x1
1892    ldrb w1, [x1, #RUN_EXIT_HOOKS_OFFSET_FROM_RUNTIME_INSTANCE]
1893    cbnz w1, .Lcall_method_exit_hook
1894.Lcall_method_exit_hook_done:
1895
1896    // Tear down the callee-save frame.
1897    CFI_REMEMBER_STATE
1898    .cfi_def_cfa_register sp
1899    // Restore callee-saves and LR as in `RESTORE_SAVE_REFS_AND_ARGS_FRAME`
1900    // but do not restore argument registers.
1901    // Note: Likewise, we could avoid restoring X20 in the case of Baker
1902    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
1903    // later; but it's not worth handling this special case.
1904#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
1905#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
1906#endif
1907    RESTORE_REG x20, 136
1908    RESTORE_TWO_REGS x21, x22, 144
1909    RESTORE_TWO_REGS x23, x24, 160
1910    RESTORE_TWO_REGS x25, x26, 176
1911    RESTORE_TWO_REGS x27, x28, 192
1912    RESTORE_TWO_REGS x29, xLR, 208
1913    // Remove the frame.
1914    DECREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
1915
1916    REFRESH_MARKING_REGISTER
1917
1918    // store into fpr, for when it's a fpr return...
1919    fmov d0, x0
1920    ret
1921
1922    // Undo the unwinding information from above since it doesn't apply below.
1923    CFI_RESTORE_STATE_AND_DEF_CFA x28, FRAME_SIZE_SAVE_REFS_AND_ARGS
1924
1925.Lcall_method_exit_hook:
1926    fmov d0, x0
1927    mov x4, FRAME_SIZE_SAVE_REFS_AND_ARGS
1928    bl art_quick_method_exit_hook
1929    b .Lcall_method_exit_hook_done
1930
1931.Lexception_in_native:
1932    // Move to x1 then sp to please assembler.
1933    ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
1934    add sp, x1, #-1  // Remove the GenericJNI tag.
1935    bl art_deliver_pending_exception
1936END art_quick_generic_jni_trampoline
1937
1938ENTRY art_deliver_pending_exception
1939    # This will create a new save-all frame, required by the runtime.
1940    DELIVER_PENDING_EXCEPTION
1941END art_deliver_pending_exception
1942
1943/*
1944 * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1945 * of a quick call:
1946 * x0 = method being called/to bridge to.
1947 * x1..x7, d0..d7 = arguments to that method.
1948 */
1949ENTRY art_quick_to_interpreter_bridge
1950    SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
1951
1952    //  x0 will contain mirror::ArtMethod* method.
1953    mov x1, xSELF                          // How to get Thread::Current() ???
1954    mov x2, sp
1955
1956    // uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
1957    //                                      mirror::ArtMethod** sp)
1958    bl   artQuickToInterpreterBridge
1959
1960    RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
1961    REFRESH_MARKING_REGISTER
1962
1963    fmov d0, x0
1964
1965    RETURN_OR_DELIVER_PENDING_EXCEPTION
1966END art_quick_to_interpreter_bridge
1967
1968/*
1969 * Called to attempt to execute an obsolete method.
1970 */
1971ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1972
1973    /*
1974     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1975     * will long jump to the upcall with a special exception of -1.
1976     */
1977    .extern artDeoptimizeFromCompiledCode
1978ENTRY art_quick_deoptimize_from_compiled_code
1979    SETUP_SAVE_EVERYTHING_FRAME
1980    mov    x1, xSELF                      // Pass thread.
1981    bl     artDeoptimizeFromCompiledCode  // (DeoptimizationKind, Thread*)
1982    brk 0
1983END art_quick_deoptimize_from_compiled_code
1984
1985
1986    /*
1987     * String's indexOf.
1988     *
1989     * TODO: Not very optimized.
1990     * On entry:
1991     *    x0:   string object (known non-null)
1992     *    w1:   char to match (known <= 0xFFFF)
1993     *    w2:   Starting offset in string data
1994     */
1995ENTRY art_quick_indexof
1996#if (STRING_COMPRESSION_FEATURE)
1997    ldr   w4, [x0, #MIRROR_STRING_COUNT_OFFSET]
1998#else
1999    ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
2000#endif
2001    add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
2002#if (STRING_COMPRESSION_FEATURE)
2003    /* w4 holds count (with flag) and w3 holds actual length */
2004    lsr   w3, w4, #1
2005#endif
2006    /* Clamp start to [0..count] */
2007    cmp   w2, #0
2008    csel  w2, wzr, w2, lt
2009    cmp   w2, w3
2010    csel  w2, w3, w2, gt
2011
2012    /* Save a copy to compute result */
2013    mov   x5, x0
2014
2015#if (STRING_COMPRESSION_FEATURE)
2016    tbz   w4, #0, .Lstring_indexof_compressed
2017#endif
2018    /* Build pointer to start of data to compare and pre-bias */
2019    add   x0, x0, x2, lsl #1
2020    sub   x0, x0, #2
2021    /* Compute iteration count */
2022    sub   w2, w3, w2
2023
2024    /*
2025     * At this point we have:
2026     *  x0: start of the data to test
2027     *  w1: char to compare
2028     *  w2: iteration count
2029     *  x5: original start of string data
2030     */
2031
2032    subs  w2, w2, #4
2033    b.lt  .Lindexof_remainder
2034
2035.Lindexof_loop4:
2036    ldrh  w6, [x0, #2]!
2037    ldrh  w7, [x0, #2]!
2038    ldrh  wIP0, [x0, #2]!
2039    ldrh  wIP1, [x0, #2]!
2040    cmp   w6, w1
2041    b.eq  .Lmatch_0
2042    cmp   w7, w1
2043    b.eq  .Lmatch_1
2044    cmp   wIP0, w1
2045    b.eq  .Lmatch_2
2046    cmp   wIP1, w1
2047    b.eq  .Lmatch_3
2048    subs  w2, w2, #4
2049    b.ge  .Lindexof_loop4
2050
2051.Lindexof_remainder:
2052    adds  w2, w2, #4
2053    b.eq  .Lindexof_nomatch
2054
2055.Lindexof_loop1:
2056    ldrh  w6, [x0, #2]!
2057    cmp   w6, w1
2058    b.eq  .Lmatch_3
2059    subs  w2, w2, #1
2060    b.ne  .Lindexof_loop1
2061
2062.Lindexof_nomatch:
2063    mov   x0, #-1
2064    ret
2065
2066.Lmatch_0:
2067    sub   x0, x0, #6
2068    sub   x0, x0, x5
2069    asr   x0, x0, #1
2070    ret
2071.Lmatch_1:
2072    sub   x0, x0, #4
2073    sub   x0, x0, x5
2074    asr   x0, x0, #1
2075    ret
2076.Lmatch_2:
2077    sub   x0, x0, #2
2078    sub   x0, x0, x5
2079    asr   x0, x0, #1
2080    ret
2081.Lmatch_3:
2082    sub   x0, x0, x5
2083    asr   x0, x0, #1
2084    ret
2085#if (STRING_COMPRESSION_FEATURE)
2086   /*
2087    * Comparing compressed string character-per-character with
2088    * input character
2089    */
2090.Lstring_indexof_compressed:
2091    add   x0, x0, x2
2092    sub   x0, x0, #1
2093    sub   w2, w3, w2
2094.Lstring_indexof_compressed_loop:
2095    subs  w2, w2, #1
2096    b.lt  .Lindexof_nomatch
2097    ldrb  w6, [x0, #1]!
2098    cmp   w6, w1
2099    b.eq  .Lstring_indexof_compressed_matched
2100    b     .Lstring_indexof_compressed_loop
2101.Lstring_indexof_compressed_matched:
2102    sub   x0, x0, x5
2103    ret
2104#endif
2105END art_quick_indexof
2106
2107    .extern artStringBuilderAppend
2108ENTRY art_quick_string_builder_append
2109    SETUP_SAVE_REFS_ONLY_FRAME          // save callee saves in case of GC
2110    add    x1, sp, #(FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__)  // pass args
2111    mov    x2, xSELF                    // pass Thread::Current
2112    bl     artStringBuilderAppend       // (uint32_t, const unit32_t*, Thread*)
2113    RESTORE_SAVE_REFS_ONLY_FRAME
2114    REFRESH_MARKING_REGISTER
2115    RETURN_IF_RESULT_IS_NON_ZERO_OR_DEOPT_OR_DELIVER
2116END art_quick_string_builder_append
2117
2118    /*
2119     * Create a function `name` calling the ReadBarrier::Mark routine,
2120     * getting its argument and returning its result through W register
2121     * `wreg` (corresponding to X register `xreg`), saving and restoring
2122     * all caller-save registers.
2123     *
2124     * The generated function follows a non-standard runtime calling convention:
2125     * - register `reg` (which may be different from W0) is used to pass the (sole) argument,
2126     * - register `reg` (which may be different from W0) is used to return the result,
2127     * - all other registers are callee-save (the values they hold are preserved).
2128     */
2129.macro READ_BARRIER_MARK_REG name, wreg, xreg
2130ENTRY \name
2131    // Reference is null, no work to do at all.
2132    cbz \wreg, .Lret_rb_\name
2133    // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
2134    ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2135    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
2136.Lret_rb_\name:
2137    ret
2138.Lnot_marked_rb_\name:
2139    // Check if the top two bits are one, if this is the case it is a forwarding address.
2140    tst   wIP0, wIP0, lsl #1
2141    bmi   .Lret_forwarding_address\name
2142.Lslow_rb_\name:
2143    /*
2144     * Allocate 44 stack slots * 8 = 352 bytes:
2145     * - 19 slots for core registers X0-15, X17, X19, LR
2146     * - 1 slot padding
2147     * - 24 slots for floating-point registers D0-D7 and D16-D31
2148     */
2149    // We must not clobber IP1 since code emitted for HLoadClass and HLoadString
2150    // relies on IP1 being preserved.
2151    // Save all potentially live caller-save core registers.
2152    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352
2153    SAVE_TWO_REGS  x2,  x3, 16
2154    SAVE_TWO_REGS  x4,  x5, 32
2155    SAVE_TWO_REGS  x6,  x7, 48
2156    SAVE_TWO_REGS  x8,  x9, 64
2157    SAVE_TWO_REGS x10, x11, 80
2158    SAVE_TWO_REGS x12, x13, 96
2159    SAVE_TWO_REGS x14, x15, 112
2160    SAVE_TWO_REGS x17, x19, 128  // Skip x16, i.e. IP0, and x18, the platform register.
2161    SAVE_REG      xLR,      144  // Save also return address.
2162    // Save all potentially live caller-save floating-point registers.
2163    stp   d0, d1,   [sp, #160]
2164    stp   d2, d3,   [sp, #176]
2165    stp   d4, d5,   [sp, #192]
2166    stp   d6, d7,   [sp, #208]
2167    stp   d16, d17, [sp, #224]
2168    stp   d18, d19, [sp, #240]
2169    stp   d20, d21, [sp, #256]
2170    stp   d22, d23, [sp, #272]
2171    stp   d24, d25, [sp, #288]
2172    stp   d26, d27, [sp, #304]
2173    stp   d28, d29, [sp, #320]
2174    stp   d30, d31, [sp, #336]
2175
2176    .ifnc \wreg, w0
2177      mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
2178    .endif
2179    bl    artReadBarrierMark            // artReadBarrierMark(obj)
2180    .ifnc \wreg, w0
2181      mov   \wreg, w0                   // Return result into `wreg`
2182    .endif
2183
2184    // Restore core regs, except `xreg`, as `wreg` is used to return the
2185    // result of this function (simply remove it from the stack instead).
2186    POP_REGS_NE x0, x1,   0,   \xreg
2187    POP_REGS_NE x2, x3,   16,  \xreg
2188    POP_REGS_NE x4, x5,   32,  \xreg
2189    POP_REGS_NE x6, x7,   48,  \xreg
2190    POP_REGS_NE x8, x9,   64,  \xreg
2191    POP_REGS_NE x10, x11, 80,  \xreg
2192    POP_REGS_NE x12, x13, 96,  \xreg
2193    POP_REGS_NE x14, x15, 112, \xreg
2194    POP_REGS_NE x17, x19, 128, \xreg
2195    POP_REG_NE  xLR,      144, \xreg  // Restore also return address.
2196    // Restore floating-point registers.
2197    ldp   d0, d1,   [sp, #160]
2198    ldp   d2, d3,   [sp, #176]
2199    ldp   d4, d5,   [sp, #192]
2200    ldp   d6, d7,   [sp, #208]
2201    ldp   d16, d17, [sp, #224]
2202    ldp   d18, d19, [sp, #240]
2203    ldp   d20, d21, [sp, #256]
2204    ldp   d22, d23, [sp, #272]
2205    ldp   d24, d25, [sp, #288]
2206    ldp   d26, d27, [sp, #304]
2207    ldp   d28, d29, [sp, #320]
2208    ldp   d30, d31, [sp, #336]
2209    // Remove frame and return.
2210    DECREASE_FRAME 352
2211    ret
2212.Lret_forwarding_address\name:
2213    // Shift left by the forwarding address shift. This clears out the state bits since they are
2214    // in the top 2 bits of the lock word.
2215    lsl   \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2216    ret
2217END \name
2218.endm
2219
2220READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
2221READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
2222READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
2223READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
2224READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
2225READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
2226READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
2227READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
2228READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
2229READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
2230READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
2231READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
2232READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
2233READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
2234READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
2235READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
2236// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked
2237READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
2238// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18 x18 is blocked
2239READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
2240READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
2241READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
2242READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
2243READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
2244READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
2245READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
2246READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
2247READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
2248READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
2249READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
2250
2251
2252.macro SELECT_X_OR_W_FOR_MACRO macro_to_use, x, w, xreg
2253    .if \xreg
2254      \macro_to_use \x
2255    .else
2256      \macro_to_use \w
2257    .endif
2258.endm
2259
2260.macro FOR_REGISTERS macro_for_register, macro_for_reserved_register, xreg
2261    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x0, w0, \xreg
2262    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x1, w1, \xreg
2263    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x2, w2, \xreg
2264    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x3, w3, \xreg
2265    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x4, w4, \xreg
2266    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x5, w5, \xreg
2267    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x6, w6, \xreg
2268    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x7, w7, \xreg
2269    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x8, w8, \xreg
2270    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x9, w9, \xreg
2271    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x10, w10, \xreg
2272    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x11, w11, \xreg
2273    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x12, w12, \xreg
2274    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x13, w13, \xreg
2275    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x14, w14, \xreg
2276    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x15, w15, \xreg
2277    \macro_for_reserved_register  // IP0 is reserved
2278    \macro_for_reserved_register  // IP1 is reserved
2279    \macro_for_reserved_register  // x18 is reserved
2280    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x19, w19, \xreg
2281    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x20, w20, \xreg
2282    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x21, w21, \xreg
2283    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x22, w22, \xreg
2284    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x23, w23, \xreg
2285    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x24, w24, \xreg
2286    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x25, w25, \xreg
2287    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x26, w26, \xreg
2288    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x27, w27, \xreg
2289    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x28, w28, \xreg
2290    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x29, w29, \xreg
2291    \macro_for_reserved_register  // lr is reserved
2292    \macro_for_reserved_register  // sp is reserved
2293.endm
2294
2295.macro FOR_XREGISTERS macro_for_register, macro_for_reserved_register
2296    FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 1
2297.endm
2298
2299.macro FOR_WREGISTERS macro_for_register, macro_for_reserved_register
2300    FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 0
2301.endm
2302
2303.macro BRK0_BRK0
2304    brk 0
2305    brk 0
2306.endm
2307
2308#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
2309#error "Array and field introspection code sharing requires same LDR offset."
2310#endif
2311.macro INTROSPECTION_ARRAY_LOAD index_reg
2312    ldr   wIP0, [xIP0, \index_reg, lsl #2]
2313    b     art_quick_read_barrier_mark_introspection
2314.endm
2315
2316.macro MOV_WIP0_TO_WREG_AND_BL_LR reg
2317    mov   \reg, wIP0
2318    br    lr  // Do not use RET as we do not enter the entrypoint with "BL".
2319.endm
2320
2321.macro READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH ldr_offset
2322    /*
2323     * Allocate 42 stack slots * 8 = 336 bytes:
2324     * - 18 slots for core registers X0-15, X19, LR
2325     * - 24 slots for floating-point registers D0-D7 and D16-D31
2326     */
2327    // Save all potentially live caller-save core registers.
2328    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 336
2329    SAVE_TWO_REGS  x2,  x3, 16
2330    SAVE_TWO_REGS  x4,  x5, 32
2331    SAVE_TWO_REGS  x6,  x7, 48
2332    SAVE_TWO_REGS  x8,  x9, 64
2333    SAVE_TWO_REGS x10, x11, 80
2334    SAVE_TWO_REGS x12, x13, 96
2335    SAVE_TWO_REGS x14, x15, 112
2336    // Skip x16, x17, i.e. IP0, IP1, and x18, the platform register.
2337    SAVE_TWO_REGS x19, xLR, 128       // Save return address.
2338    // Save all potentially live caller-save floating-point registers.
2339    stp   d0, d1,   [sp, #144]
2340    stp   d2, d3,   [sp, #160]
2341    stp   d4, d5,   [sp, #176]
2342    stp   d6, d7,   [sp, #192]
2343    stp   d16, d17, [sp, #208]
2344    stp   d18, d19, [sp, #224]
2345    stp   d20, d21, [sp, #240]
2346    stp   d22, d23, [sp, #256]
2347    stp   d24, d25, [sp, #272]
2348    stp   d26, d27, [sp, #288]
2349    stp   d28, d29, [sp, #304]
2350    stp   d30, d31, [sp, #320]
2351
2352    mov   x0, xIP0
2353    bl    artReadBarrierMark          // artReadBarrierMark(obj)
2354    mov   xIP0, x0
2355
2356    // Restore core regs, except x0 and x1 as the return register switch case
2357    // address calculation is smoother with an extra register.
2358    RESTORE_TWO_REGS  x2,  x3, 16
2359    RESTORE_TWO_REGS  x4,  x5, 32
2360    RESTORE_TWO_REGS  x6,  x7, 48
2361    RESTORE_TWO_REGS  x8,  x9, 64
2362    RESTORE_TWO_REGS x10, x11, 80
2363    RESTORE_TWO_REGS x12, x13, 96
2364    RESTORE_TWO_REGS x14, x15, 112
2365    // Skip x16, x17, i.e. IP0, IP1, and x18, the platform register.
2366    RESTORE_TWO_REGS x19, xLR, 128    // Restore return address.
2367    // Restore caller-save floating-point registers.
2368    ldp   d0, d1,   [sp, #144]
2369    ldp   d2, d3,   [sp, #160]
2370    ldp   d4, d5,   [sp, #176]
2371    ldp   d6, d7,   [sp, #192]
2372    ldp   d16, d17, [sp, #208]
2373    ldp   d18, d19, [sp, #224]
2374    ldp   d20, d21, [sp, #240]
2375    ldp   d22, d23, [sp, #256]
2376    ldp   d24, d25, [sp, #272]
2377    ldp   d26, d27, [sp, #288]
2378    ldp   d28, d29, [sp, #304]
2379    ldp   d30, d31, [sp, #320]
2380
2381    ldr   x0, [lr, #\ldr_offset]      // Load the instruction.
2382    adr   xIP1, .Lmark_introspection_return_switch
2383    bfi   xIP1, x0, #3, #5            // Calculate switch case address.
2384    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 336
2385    br    xIP1
2386.endm
2387
2388    /*
2389     * Use introspection to load a reference from the same address as the LDR
2390     * instruction in generated code would load (unless loaded by the thunk,
2391     * see below), call ReadBarrier::Mark() with that reference if needed
2392     * and return it in the same register as the LDR instruction would load.
2393     *
2394     * The entrypoint is called through a thunk that differs across load kinds.
2395     * For field and array loads the LDR instruction in generated code follows
2396     * the branch to the thunk, i.e. the LDR is at [LR, #-4], and the thunk
2397     * knows the holder and performs the gray bit check, returning to the LDR
2398     * instruction if the object is not gray, so this entrypoint no longer
2399     * needs to know anything about the holder. For GC root loads, the LDR
2400     * instruction in generated code precedes the branch to the thunk (i.e.
2401     * the LDR is at [LR, #-8]) and the thunk does not do the gray bit check.
2402     *
2403     * For field accesses and array loads with a constant index the thunk loads
2404     * the reference into IP0 using introspection and calls the main entrypoint,
2405     * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
2406     * the passed reference is poisoned.
2407     *
2408     * For array accesses with non-constant index, the thunk inserts the bits
2409     * 16-21 of the LDR instruction to the entrypoint address, effectively
2410     * calculating a switch case label based on the index register (bits 16-20)
2411     * and adding an extra offset (bit 21 is set) to differentiate from the
2412     * main entrypoint, then moves the base register to IP0 and jumps to the
2413     * switch case. Therefore we need to align the main entrypoint to 512 bytes,
2414     * accounting for a 256-byte offset followed by 32 array entrypoints
2415     * starting at art_quick_read_barrier_mark_introspection_arrays, each
2416     * containing an LDR (register) and a branch to the main entrypoint.
2417     *
2418     * For GC root accesses we cannot use the main entrypoint because of the
2419     * different offset where the LDR instruction in generated code is located.
2420     * (And even with heap poisoning enabled, GC roots are not poisoned.)
2421     * To re-use the same entrypoint pointer in generated code, we make sure
2422     * that the gc root entrypoint (a copy of the entrypoint with a different
2423     * offset for introspection loads) is located at a known offset (768 bytes,
2424     * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
2425     * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
2426     * the root register to IP0 and jumps to the customized entrypoint,
2427     * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
2428     * performs all the fast-path checks, so we need just the slow path.
2429     * The UnsafeCASObject intrinsic is also using the GC root entrypoint with
2430     * MOV instead of LDR, the destination register is in the same bits.
2431     *
2432     * The code structure is
2433     *   art_quick_read_barrier_mark_introspection:
2434     *     Up to 256 bytes for the main entrypoint code.
2435     *     Padding to 256 bytes if needed.
2436     *   art_quick_read_barrier_mark_introspection_arrays:
2437     *     Exactly 256 bytes for array load switch cases (32x2 instructions).
2438     *   .Lmark_introspection_return_switch:
2439     *     Exactly 256 bytes for return switch cases (32x2 instructions).
2440     *   art_quick_read_barrier_mark_introspection_gc_roots:
2441     *     GC root entrypoint code.
2442     */
2443ENTRY_ALIGNED art_quick_read_barrier_mark_introspection, 512
2444    // At this point, IP0 contains the reference, IP1 can be freely used.
2445    // For heap poisoning, the reference is poisoned, so unpoison it first.
2446    UNPOISON_HEAP_REF wIP0
2447    // If reference is null, just return it in the right register.
2448    cbz   wIP0, .Lmark_introspection_return
2449    // Use wIP1 as temp and check the mark bit of the reference.
2450    ldr   wIP1, [xIP0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2451    tbz   wIP1, #LOCK_WORD_MARK_BIT_SHIFT, .Lmark_introspection_unmarked
2452.Lmark_introspection_return:
2453    // Without an extra register for the return switch case address calculation,
2454    // we exploit the high word of the xIP0 to temporarily store the ref_reg*8,
2455    // so the return switch below must move wIP0 instead of xIP0 to the register.
2456    ldr   wIP1, [lr, #BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET]  // Load the instruction.
2457    bfi   xIP0, xIP1, #(32 + 3), #5   // Extract ref_reg*8 to high word in xIP0.
2458    adr   xIP1, .Lmark_introspection_return_switch
2459    bfxil xIP1, xIP0, #32, #8         // Calculate return switch case address.
2460    br    xIP1
2461.Lmark_introspection_unmarked:
2462    // Check if the top two bits are one, if this is the case it is a forwarding address.
2463    tst   wIP1, wIP1, lsl #1
2464    bmi   .Lmark_introspection_forwarding_address
2465    READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
2466
2467.Lmark_introspection_forwarding_address:
2468    // Shift left by the forwarding address shift. This clears out the state bits since they are
2469    // in the top 2 bits of the lock word.
2470    lsl   wIP0, wIP1, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2471    b .Lmark_introspection_return
2472
2473    // We're very close to the alloted 256B for the entrypoint code before the
2474    // array switch cases. Should we go a little bit over the limit, we can
2475    // move some code after the array switch cases and return switch cases.
2476    .balign 256
2477    .hidden art_quick_read_barrier_mark_introspection_arrays
2478    .global art_quick_read_barrier_mark_introspection_arrays
2479art_quick_read_barrier_mark_introspection_arrays:
2480    FOR_XREGISTERS INTROSPECTION_ARRAY_LOAD, BRK0_BRK0
2481.Lmark_introspection_return_switch:
2482    FOR_WREGISTERS MOV_WIP0_TO_WREG_AND_BL_LR, BRK0_BRK0
2483    .hidden art_quick_read_barrier_mark_introspection_gc_roots
2484    .global art_quick_read_barrier_mark_introspection_gc_roots
2485art_quick_read_barrier_mark_introspection_gc_roots:
2486    READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
2487END art_quick_read_barrier_mark_introspection
2488
2489.extern artInvokePolymorphic
2490ENTRY art_quick_invoke_polymorphic
2491    SETUP_SAVE_REFS_AND_ARGS_FRAME      // Save callee saves in case allocation triggers GC.
2492    mov     x0, x1                      // x0 := receiver
2493    mov     x1, xSELF                   // x1 := Thread::Current()
2494    mov     x2, sp                      // x2 := SP
2495    bl      artInvokePolymorphic        // artInvokePolymorphic(receiver, thread, save_area)
2496    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2497    REFRESH_MARKING_REGISTER
2498    fmov    d0, x0                      // Result is in x0. Copy to floating return register.
2499    RETURN_OR_DELIVER_PENDING_EXCEPTION
2500END  art_quick_invoke_polymorphic
2501
2502.extern artInvokeCustom
2503ENTRY art_quick_invoke_custom
2504    SETUP_SAVE_REFS_AND_ARGS_FRAME    // Save callee saves in case allocation triggers GC.
2505                                      // x0 := call_site_idx
2506    mov     x1, xSELF                 // x1 := Thread::Current()
2507    mov     x2, sp                    // x2 := SP
2508    bl      artInvokeCustom           // artInvokeCustom(call_site_idx, thread, save_area)
2509    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2510    REFRESH_MARKING_REGISTER
2511    fmov    d0, x0                    // Copy result to double result register.
2512    RETURN_OR_DELIVER_PENDING_EXCEPTION
2513END  art_quick_invoke_custom
2514
2515// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2516//  Argument 0: x0: The context pointer for ExecuteSwitchImpl.
2517//  Argument 1: x1: Pointer to the templated ExecuteSwitchImpl to call.
2518//  Argument 2: x2: The value of DEX PC (memory address of the methods bytecode).
2519ENTRY ExecuteSwitchImplAsm
2520    SAVE_TWO_REGS_INCREASE_FRAME x19, xLR, 16
2521    mov x19, x2                                   // x19 = DEX PC
2522    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* x0 */, 19 /* x19 */, 0)
2523    blr x1                                        // Call the wrapped method.
2524    RESTORE_TWO_REGS_DECREASE_FRAME x19, xLR, 16
2525    ret
2526END ExecuteSwitchImplAsm
2527
2528// x0 contains the class, x8 contains the inline cache. x9-x15 can be used.
2529ENTRY art_quick_update_inline_cache
2530#if (INLINE_CACHE_SIZE != 5)
2531#error "INLINE_CACHE_SIZE not as expected."
2532#endif
2533#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
2534    // Don't update the cache if we are marking.
2535    cbnz wMR, .Ldone
2536#endif
2537.Lentry1:
2538    ldr w9, [x8, #INLINE_CACHE_CLASSES_OFFSET]
2539    cmp w9, w0
2540    beq .Ldone
2541    cbnz w9, .Lentry2
2542    add x10, x8, #INLINE_CACHE_CLASSES_OFFSET
2543    ldxr w9, [x10]
2544    cbnz w9, .Lentry1
2545    stxr  w9, w0, [x10]
2546    cbz   w9, .Ldone
2547    b .Lentry1
2548.Lentry2:
2549    ldr w9, [x8, #INLINE_CACHE_CLASSES_OFFSET+4]
2550    cmp w9, w0
2551    beq .Ldone
2552    cbnz w9, .Lentry3
2553    add x10, x8, #INLINE_CACHE_CLASSES_OFFSET+4
2554    ldxr w9, [x10]
2555    cbnz w9, .Lentry2
2556    stxr  w9, w0, [x10]
2557    cbz   w9, .Ldone
2558    b .Lentry2
2559.Lentry3:
2560    ldr w9, [x8, #INLINE_CACHE_CLASSES_OFFSET+8]
2561    cmp w9, w0
2562    beq .Ldone
2563    cbnz w9, .Lentry4
2564    add x10, x8, #INLINE_CACHE_CLASSES_OFFSET+8
2565    ldxr w9, [x10]
2566    cbnz w9, .Lentry3
2567    stxr  w9, w0, [x10]
2568    cbz   w9, .Ldone
2569    b .Lentry3
2570.Lentry4:
2571    ldr w9, [x8, #INLINE_CACHE_CLASSES_OFFSET+12]
2572    cmp w9, w0
2573    beq .Ldone
2574    cbnz w9, .Lentry5
2575    add x10, x8, #INLINE_CACHE_CLASSES_OFFSET+12
2576    ldxr w9, [x10]
2577    cbnz w9, .Lentry4
2578    stxr  w9, w0, [x10]
2579    cbz   w9, .Ldone
2580    b .Lentry4
2581.Lentry5:
2582    // Unconditionally store, the inline cache is megamorphic.
2583    str  w0, [x8, #INLINE_CACHE_CLASSES_OFFSET+16]
2584.Ldone:
2585    ret
2586END art_quick_update_inline_cache
2587
2588// On entry, method is at the bottom of the stack.
2589ENTRY art_quick_compile_optimized
2590    SETUP_SAVE_EVERYTHING_FRAME
2591    ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod
2592    mov x1, xSELF                             // pass Thread::Current
2593    bl     artCompileOptimized                // (ArtMethod*, Thread*)
2594    RESTORE_SAVE_EVERYTHING_FRAME
2595    // We don't need to restore the marking register here, as
2596    // artCompileOptimized doesn't allow thread suspension.
2597    ret
2598END art_quick_compile_optimized
2599
2600    .extern artMethodEntryHook
2601ENTRY art_quick_method_entry_hook
2602    SETUP_SAVE_EVERYTHING_FRAME
2603
2604    ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod*
2605    mov x1, xSELF                             // pass Thread::Current
2606    mov x2, sp                                // pass SP
2607    bl  artMethodEntryHook                    // (ArtMethod*, Thread*, SP)
2608
2609    RESTORE_SAVE_EVERYTHING_FRAME             // Note: will restore xSELF
2610    REFRESH_MARKING_REGISTER
2611    ret
2612END art_quick_method_entry_hook
2613
2614    .extern artMethodExitHook
2615ENTRY art_quick_method_exit_hook
2616    SETUP_SAVE_EVERYTHING_FRAME
2617
2618    // frame_size is passed from JITed code in x4
2619    add x3, sp, #16                           // floating-point result ptr in kSaveEverything frame
2620    add x2, sp, #272                          // integer result ptr in kSaveEverything frame
2621    add x1, sp, #FRAME_SIZE_SAVE_EVERYTHING   // ArtMethod**
2622    mov x0, xSELF                             // Thread::Current
2623    bl  artMethodExitHook                     // (Thread*, ArtMethod**, gpr_res*, fpr_res*,
2624                                              // frame_size)
2625
2626    // Normal return.
2627    RESTORE_SAVE_EVERYTHING_FRAME
2628    REFRESH_MARKING_REGISTER
2629    ret
2630END art_quick_method_exit_hook
2631