1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /* This file contains codegen for the Thumb2 ISA. */
18
19 #include "codegen_arm64.h"
20
21 #include "arm64_lir.h"
22 #include "art_method.h"
23 #include "base/logging.h"
24 #include "dex/mir_graph.h"
25 #include "dex/quick/dex_file_to_method_inliner_map.h"
26 #include "dex/quick/mir_to_lir-inl.h"
27 #include "driver/compiler_driver.h"
28 #include "driver/compiler_options.h"
29 #include "gc/accounting/card_table.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "mirror/object_array-inl.h"
32 #include "utils/dex_cache_arrays_layout-inl.h"
33
34 namespace art {
35
36 /*
37 * The sparse table in the literal pool is an array of <key,displacement>
38 * pairs. For each set, we'll load them as a pair using ldp.
39 * The test loop will look something like:
40 *
41 * adr r_base, <table>
42 * ldr r_val, [rA64_SP, v_reg_off]
43 * mov r_idx, #table_size
44 * loop:
45 * cbz r_idx, quit
46 * ldp r_key, r_disp, [r_base], #8
47 * sub r_idx, #1
48 * cmp r_val, r_key
49 * b.ne loop
50 * adr r_base, #0 ; This is the instruction from which we compute displacements
51 * add r_base, r_disp
52 * br r_base
53 * quit:
54 */
GenLargeSparseSwitch(MIR * mir,uint32_t table_offset,RegLocation rl_src)55 void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
56 const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
57 // Add the table to the list - we'll process it later
58 SwitchTable *tab_rec =
59 static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
60 tab_rec->switch_mir = mir;
61 tab_rec->table = table;
62 tab_rec->vaddr = current_dalvik_offset_;
63 uint32_t size = table[1];
64 switch_tables_.push_back(tab_rec);
65
66 // Get the switch value
67 rl_src = LoadValue(rl_src, kCoreReg);
68 RegStorage r_base = AllocTempWide();
69 // Allocate key and disp temps.
70 RegStorage r_key = AllocTemp();
71 RegStorage r_disp = AllocTemp();
72 // Materialize a pointer to the switch table
73 NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, WrapPointer(tab_rec));
74 // Set up r_idx
75 RegStorage r_idx = AllocTemp();
76 LoadConstant(r_idx, size);
77
78 // Entry of loop.
79 LIR* loop_entry = NewLIR0(kPseudoTargetLabel);
80 LIR* branch_out = NewLIR2(kA64Cbz2rt, r_idx.GetReg(), 0);
81
82 // Load next key/disp.
83 NewLIR4(kA64LdpPost4rrXD, r_key.GetReg(), r_disp.GetReg(), r_base.GetReg(), 2);
84 OpRegRegImm(kOpSub, r_idx, r_idx, 1);
85
86 // Go to next case, if key does not match.
87 OpRegReg(kOpCmp, r_key, rl_src.reg);
88 OpCondBranch(kCondNe, loop_entry);
89
90 // Key does match: branch to case label.
91 LIR* switch_label = NewLIR3(kA64Adr2xd, r_base.GetReg(), 0, -1);
92 tab_rec->anchor = switch_label;
93
94 // Add displacement to base branch address and go!
95 OpRegRegRegExtend(kOpAdd, r_base, r_base, As64BitReg(r_disp), kA64Sxtw, 0U);
96 NewLIR1(kA64Br1x, r_base.GetReg());
97
98 // Loop exit label.
99 LIR* loop_exit = NewLIR0(kPseudoTargetLabel);
100 branch_out->target = loop_exit;
101 }
102
103
GenLargePackedSwitch(MIR * mir,uint32_t table_offset,RegLocation rl_src)104 void Arm64Mir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
105 const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
106 // Add the table to the list - we'll process it later
107 SwitchTable *tab_rec =
108 static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
109 tab_rec->switch_mir = mir;
110 tab_rec->table = table;
111 tab_rec->vaddr = current_dalvik_offset_;
112 uint32_t size = table[1];
113 switch_tables_.push_back(tab_rec);
114
115 // Get the switch value
116 rl_src = LoadValue(rl_src, kCoreReg);
117 RegStorage table_base = AllocTempWide();
118 // Materialize a pointer to the switch table
119 NewLIR3(kA64Adr2xd, table_base.GetReg(), 0, WrapPointer(tab_rec));
120 int low_key = s4FromSwitchData(&table[2]);
121 RegStorage key_reg;
122 // Remove the bias, if necessary
123 if (low_key == 0) {
124 key_reg = rl_src.reg;
125 } else {
126 key_reg = AllocTemp();
127 OpRegRegImm(kOpSub, key_reg, rl_src.reg, low_key);
128 }
129 // Bounds check - if < 0 or >= size continue following switch
130 OpRegImm(kOpCmp, key_reg, size - 1);
131 LIR* branch_over = OpCondBranch(kCondHi, nullptr);
132
133 // Load the displacement from the switch table
134 RegStorage disp_reg = AllocTemp();
135 LoadBaseIndexed(table_base, As64BitReg(key_reg), disp_reg, 2, k32);
136
137 // Get base branch address.
138 RegStorage branch_reg = AllocTempWide();
139 LIR* switch_label = NewLIR3(kA64Adr2xd, branch_reg.GetReg(), 0, -1);
140 tab_rec->anchor = switch_label;
141
142 // Add displacement to base branch address and go!
143 OpRegRegRegExtend(kOpAdd, branch_reg, branch_reg, As64BitReg(disp_reg), kA64Sxtw, 0U);
144 NewLIR1(kA64Br1x, branch_reg.GetReg());
145
146 // branch_over target here
147 LIR* target = NewLIR0(kPseudoTargetLabel);
148 branch_over->target = target;
149 }
150
151 /*
152 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more
153 * details see monitor.cc.
154 */
GenMonitorEnter(int opt_flags,RegLocation rl_src)155 void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
156 // x0/w0 = object
157 // w1 = thin lock thread id
158 // x2 = address of lock word
159 // w3 = lock word / store failure
160 // TUNING: How much performance we get when we inline this?
161 // Since we've already flush all register.
162 FlushAllRegs();
163 LoadValueDirectFixed(rl_src, rs_x0); // = TargetReg(kArg0, kRef)
164 LockCallTemps(); // Prepare for explicit register usage
165 LIR* null_check_branch = nullptr;
166 if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
167 null_check_branch = nullptr; // No null check.
168 } else {
169 // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
170 if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
171 null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, nullptr);
172 }
173 }
174 Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
175 OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
176 NewLIR2(kA64Ldxr2rX, rw3, rx2);
177 MarkPossibleNullPointerException(opt_flags);
178 // Zero out the read barrier bits.
179 OpRegRegImm(kOpAnd, rs_w2, rs_w3, LockWord::kReadBarrierStateMaskShiftedToggled);
180 LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_w2, 0, nullptr);
181 // w3 is zero except for the rb bits here. Copy the read barrier bits into w1.
182 OpRegRegReg(kOpOr, rs_w1, rs_w1, rs_w3);
183 OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
184 NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
185 LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_w3, 0, nullptr);
186
187 LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
188 not_unlocked_branch->target = slow_path_target;
189 if (null_check_branch != nullptr) {
190 null_check_branch->target = slow_path_target;
191 }
192 // TODO: move to a slow path.
193 // Go expensive route - artLockObjectFromCode(obj);
194 LoadWordDisp(rs_xSELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_xLR);
195 ClobberCallerSave();
196 LIR* call_inst = OpReg(kOpBlx, rs_xLR);
197 MarkSafepointPC(call_inst);
198
199 LIR* success_target = NewLIR0(kPseudoTargetLabel);
200 lock_success_branch->target = success_target;
201 GenMemBarrier(kLoadAny);
202 }
203
204 /*
205 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
206 * details see monitor.cc. Note the code below doesn't use ldxr/stxr as the code holds the lock
207 * and can only give away ownership if its suspended.
208 */
GenMonitorExit(int opt_flags,RegLocation rl_src)209 void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
210 // x0/w0 = object
211 // w1 = thin lock thread id
212 // w2 = lock word
213 // TUNING: How much performance we get when we inline this?
214 // Since we've already flush all register.
215 FlushAllRegs();
216 LoadValueDirectFixed(rl_src, rs_x0); // Get obj
217 LockCallTemps(); // Prepare for explicit register usage
218 LIR* null_check_branch = nullptr;
219 if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
220 null_check_branch = nullptr; // No null check.
221 } else {
222 // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
223 if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
224 null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, nullptr);
225 }
226 }
227 Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
228 if (!kUseReadBarrier) {
229 Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
230 } else {
231 OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value());
232 NewLIR2(kA64Ldxr2rX, rw2, rx3);
233 }
234 MarkPossibleNullPointerException(opt_flags);
235 // Zero out the read barrier bits.
236 OpRegRegImm(kOpAnd, rs_w3, rs_w2, LockWord::kReadBarrierStateMaskShiftedToggled);
237 // Zero out except the read barrier bits.
238 OpRegRegImm(kOpAnd, rs_w2, rs_w2, LockWord::kReadBarrierStateMaskShifted);
239 LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w3, rs_w1, nullptr);
240 GenMemBarrier(kAnyStore);
241 LIR* unlock_success_branch;
242 if (!kUseReadBarrier) {
243 Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
244 unlock_success_branch = OpUnconditionalBranch(nullptr);
245 } else {
246 OpRegRegImm(kOpAdd, rs_x3, rs_x0, mirror::Object::MonitorOffset().Int32Value());
247 NewLIR3(kA64Stxr3wrX, rw1, rw2, rx3);
248 unlock_success_branch = OpCmpImmBranch(kCondEq, rs_w1, 0, nullptr);
249 }
250 LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
251 slow_unlock_branch->target = slow_path_target;
252 if (null_check_branch != nullptr) {
253 null_check_branch->target = slow_path_target;
254 }
255 // TODO: move to a slow path.
256 // Go expensive route - artUnlockObjectFromCode(obj);
257 LoadWordDisp(rs_xSELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_xLR);
258 ClobberCallerSave();
259 LIR* call_inst = OpReg(kOpBlx, rs_xLR);
260 MarkSafepointPC(call_inst);
261
262 LIR* success_target = NewLIR0(kPseudoTargetLabel);
263 unlock_success_branch->target = success_target;
264 }
265
GenMoveException(RegLocation rl_dest)266 void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
267 int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
268 RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
269 LoadRefDisp(rs_xSELF, ex_offset, rl_result.reg, kNotVolatile);
270 StoreRefDisp(rs_xSELF, ex_offset, rs_xzr, kNotVolatile);
271 StoreValue(rl_dest, rl_result);
272 }
273
UnconditionallyMarkGCCard(RegStorage tgt_addr_reg)274 void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
275 RegStorage reg_card_base = AllocTempWide();
276 RegStorage reg_card_no = AllocTempWide(); // Needs to be wide as addr is ref=64b
277 LoadWordDisp(rs_xSELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
278 OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
279 // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
280 StoreBaseIndexed(reg_card_base, reg_card_no, As32BitReg(reg_card_base),
281 0, kUnsignedByte);
282 FreeTemp(reg_card_base);
283 FreeTemp(reg_card_no);
284 }
285
DwarfCoreReg(int num)286 static dwarf::Reg DwarfCoreReg(int num) {
287 return dwarf::Reg::Arm64Core(num);
288 }
289
GenEntrySequence(RegLocation * ArgLocs,RegLocation rl_method)290 void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
291 DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack.
292
293 /*
294 * On entry, x0 to x7 are live. Let the register allocation
295 * mechanism know so it doesn't try to use any of them when
296 * expanding the frame or flushing.
297 * Reserve x8 & x9 for temporaries.
298 */
299 LockTemp(rs_x0);
300 LockTemp(rs_x1);
301 LockTemp(rs_x2);
302 LockTemp(rs_x3);
303 LockTemp(rs_x4);
304 LockTemp(rs_x5);
305 LockTemp(rs_x6);
306 LockTemp(rs_x7);
307 LockTemp(rs_xIP0);
308 LockTemp(rs_xIP1);
309
310 /* TUNING:
311 * Use AllocTemp() and reuse LR if possible to give us the freedom on adjusting the number
312 * of temp registers.
313 */
314
315 /*
316 * We can safely skip the stack overflow check if we're
317 * a leaf *and* our frame size < fudge factor.
318 */
319 bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
320 !FrameNeedsStackCheck(frame_size_, kArm64);
321
322 const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64);
323 const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
324 bool generate_explicit_stack_overflow_check = large_frame ||
325 !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
326 const int spill_count = num_core_spills_ + num_fp_spills_;
327 const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf; // SP 16 byte alignment.
328 const int frame_size_without_spills = frame_size_ - spill_size;
329
330 if (!skip_overflow_check) {
331 if (generate_explicit_stack_overflow_check) {
332 // Load stack limit
333 LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1);
334 } else {
335 // Implicit stack overflow check.
336 // Generate a load from [sp, #-framesize]. If this is in the stack
337 // redzone we will get a segmentation fault.
338
339 // TODO: If the frame size is small enough, is it possible to make this a pre-indexed load,
340 // so that we can avoid the following "sub sp" when spilling?
341 OpRegRegImm(kOpSub, rs_x8, rs_sp, GetStackOverflowReservedBytes(kArm64));
342 Load32Disp(rs_x8, 0, rs_wzr);
343 MarkPossibleStackOverflowException();
344 }
345 }
346
347 int spilled_already = 0;
348 if (spill_size > 0) {
349 spilled_already = SpillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_);
350 DCHECK(spill_size == spilled_already || frame_size_ == spilled_already);
351 }
352
353 if (spilled_already != frame_size_) {
354 OpRegImm(kOpSub, rs_sp, frame_size_without_spills);
355 cfi_.AdjustCFAOffset(frame_size_without_spills);
356 }
357
358 if (!skip_overflow_check) {
359 if (generate_explicit_stack_overflow_check) {
360 class StackOverflowSlowPath: public LIRSlowPath {
361 public:
362 StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
363 : LIRSlowPath(m2l, branch),
364 sp_displace_(sp_displace) {
365 }
366 void Compile() OVERRIDE {
367 m2l_->ResetRegPool();
368 m2l_->ResetDefTracking();
369 GenerateTargetLabel(kPseudoThrowTarget);
370 // Unwinds stack.
371 m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
372 m2l_->cfi().AdjustCFAOffset(-sp_displace_);
373 m2l_->ClobberCallerSave();
374 ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
375 m2l_->LockTemp(rs_xIP0);
376 m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0);
377 m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg());
378 m2l_->FreeTemp(rs_xIP0);
379 m2l_->cfi().AdjustCFAOffset(sp_displace_);
380 }
381
382 private:
383 const size_t sp_displace_;
384 };
385
386 LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_xIP1, nullptr);
387 AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_));
388 }
389 }
390
391 FlushIns(ArgLocs, rl_method);
392
393 FreeTemp(rs_x0);
394 FreeTemp(rs_x1);
395 FreeTemp(rs_x2);
396 FreeTemp(rs_x3);
397 FreeTemp(rs_x4);
398 FreeTemp(rs_x5);
399 FreeTemp(rs_x6);
400 FreeTemp(rs_x7);
401 FreeTemp(rs_xIP0);
402 FreeTemp(rs_xIP1);
403 }
404
GenExitSequence()405 void Arm64Mir2Lir::GenExitSequence() {
406 cfi_.RememberState();
407 /*
408 * In the exit path, r0/r1 are live - make sure they aren't
409 * allocated by the register utilities as temps.
410 */
411 LockTemp(rs_x0);
412 LockTemp(rs_x1);
413 UnspillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_);
414
415 // Finally return.
416 NewLIR0(kA64Ret);
417 // The CFI should be restored for any code that follows the exit block.
418 cfi_.RestoreState();
419 cfi_.DefCFAOffset(frame_size_);
420 }
421
GenSpecialExitSequence()422 void Arm64Mir2Lir::GenSpecialExitSequence() {
423 NewLIR0(kA64Ret);
424 }
425
GenSpecialEntryForSuspend()426 void Arm64Mir2Lir::GenSpecialEntryForSuspend() {
427 // Keep 16-byte stack alignment - push x0, i.e. ArtMethod*, lr.
428 core_spill_mask_ = (1u << rs_xLR.GetRegNum());
429 num_core_spills_ = 1u;
430 fp_spill_mask_ = 0u;
431 num_fp_spills_ = 0u;
432 frame_size_ = 16u;
433 core_vmap_table_.clear();
434 fp_vmap_table_.clear();
435 NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
436 cfi_.AdjustCFAOffset(frame_size_);
437 // Do not generate CFI for scratch register x0.
438 cfi_.RelOffset(DwarfCoreReg(rxLR), 8);
439 }
440
GenSpecialExitForSuspend()441 void Arm64Mir2Lir::GenSpecialExitForSuspend() {
442 // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
443 NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
444 cfi_.AdjustCFAOffset(-frame_size_);
445 cfi_.Restore(DwarfCoreReg(rxLR));
446 }
447
Arm64UseRelativeCall(CompilationUnit * cu,const MethodReference & target_method)448 static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
449 // Emit relative calls anywhere in the image or within a dex file otherwise.
450 return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file;
451 }
452
453 /*
454 * Bit of a hack here - in the absence of a real scheduling pass,
455 * emit the next instruction in static & direct invoke sequences.
456 */
Arm64NextSDCallInsn(CompilationUnit * cu,CallInfo * info,int state,const MethodReference & target_method,uint32_t unused_idx ATTRIBUTE_UNUSED,uintptr_t direct_code,uintptr_t direct_method,InvokeType type)457 int Arm64Mir2Lir::Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
458 int state, const MethodReference& target_method,
459 uint32_t unused_idx ATTRIBUTE_UNUSED,
460 uintptr_t direct_code, uintptr_t direct_method,
461 InvokeType type) {
462 Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get());
463 if (info->string_init_offset != 0) {
464 RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
465 switch (state) {
466 case 0: { // Grab target method* from thread pointer
467 cg->LoadWordDisp(rs_xSELF, info->string_init_offset, arg0_ref);
468 break;
469 }
470 case 1: // Grab the code from the method*
471 if (direct_code == 0) {
472 // kInvokeTgt := arg0_ref->entrypoint
473 cg->LoadWordDisp(arg0_ref,
474 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
475 kArm64PointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
476 }
477 break;
478 default:
479 return -1;
480 }
481 } else if (direct_code != 0 && direct_method != 0) {
482 switch (state) {
483 case 0: // Get the current Method* [sets kArg0]
484 if (direct_code != static_cast<uintptr_t>(-1)) {
485 cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
486 } else if (Arm64UseRelativeCall(cu, target_method)) {
487 // Defer to linker patch.
488 } else {
489 cg->LoadCodeAddress(target_method, type, kInvokeTgt);
490 }
491 if (direct_method != static_cast<uintptr_t>(-1)) {
492 cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
493 } else {
494 cg->LoadMethodAddress(target_method, type, kArg0);
495 }
496 break;
497 default:
498 return -1;
499 }
500 } else {
501 bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
502 RegStorage arg0_ref = cg->TargetPtrReg(kArg0);
503 switch (state) {
504 case 0: // Get the current Method* [sets kArg0]
505 // TUNING: we can save a reg copy if Method* has been promoted.
506 if (!use_pc_rel) {
507 cg->LoadCurrMethodDirect(arg0_ref);
508 break;
509 }
510 ++state;
511 FALLTHROUGH_INTENDED;
512 case 1: // Get method->dex_cache_resolved_methods_
513 if (!use_pc_rel) {
514 cg->LoadRefDisp(arg0_ref,
515 ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
516 arg0_ref,
517 kNotVolatile);
518 }
519 // Set up direct code if known.
520 if (direct_code != 0) {
521 if (direct_code != static_cast<uintptr_t>(-1)) {
522 cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
523 } else if (Arm64UseRelativeCall(cu, target_method)) {
524 // Defer to linker patch.
525 } else {
526 CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
527 cg->LoadCodeAddress(target_method, type, kInvokeTgt);
528 }
529 }
530 if (!use_pc_rel || direct_code != 0) {
531 break;
532 }
533 ++state;
534 FALLTHROUGH_INTENDED;
535 case 2: // Grab target method*
536 CHECK_EQ(cu->dex_file, target_method.dex_file);
537 if (!use_pc_rel) {
538 cg->LoadWordDisp(arg0_ref,
539 mirror::Array::DataOffset(kArm64PointerSize).Uint32Value() +
540 target_method.dex_method_index * kArm64PointerSize, arg0_ref);
541 } else {
542 size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
543 cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref, true);
544 }
545 break;
546 case 3: // Grab the code from the method*
547 if (direct_code == 0) {
548 // kInvokeTgt := arg0_ref->entrypoint
549 cg->LoadWordDisp(arg0_ref,
550 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
551 kArm64PointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
552 }
553 break;
554 default:
555 return -1;
556 }
557 }
558 return state + 1;
559 }
560
GetNextSDCallInsn()561 NextCallInsn Arm64Mir2Lir::GetNextSDCallInsn() {
562 return Arm64NextSDCallInsn;
563 }
564
CallWithLinkerFixup(const MethodReference & target_method,InvokeType type)565 LIR* Arm64Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
566 // For ARM64, just generate a relative BL instruction that will be filled in at 'link time'.
567 // If the target turns out to be too far, the linker will generate a thunk for dispatch.
568 int target_method_idx = target_method.dex_method_index;
569 const DexFile* target_dex_file = target_method.dex_file;
570
571 // Generate the call instruction and save index, dex_file, and type.
572 // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
573 // as a placeholder for the offset.
574 LIR* call = RawLIR(current_dalvik_offset_, kA64Bl1t, 0,
575 target_method_idx, WrapPointer(target_dex_file), type);
576 AppendLIR(call);
577 call_method_insns_.push_back(call);
578 return call;
579 }
580
GenCallInsn(const MirMethodLoweringInfo & method_info)581 LIR* Arm64Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
582 LIR* call_insn;
583 if (method_info.FastPath() && Arm64UseRelativeCall(cu_, method_info.GetTargetMethod()) &&
584 (method_info.GetSharpType() == kDirect || method_info.GetSharpType() == kStatic) &&
585 method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
586 call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
587 } else {
588 call_insn = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
589 }
590 return call_insn;
591 }
592
593 } // namespace art
594