1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "arch/arm64/asm_support_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "entrypoints/quick/quick_entrypoints_enum.h"
26 #include "gc/accounting/card_table.h"
27 #include "intrinsics.h"
28 #include "intrinsics_arm64.h"
29 #include "linker/arm64/relative_patcher_arm64.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/class-inl.h"
32 #include "offsets.h"
33 #include "thread.h"
34 #include "utils/arm64/assembler_arm64.h"
35 #include "utils/assembler.h"
36 #include "utils/stack_checks.h"
37
38 using namespace vixl::aarch64; // NOLINT(build/namespaces)
39 using vixl::ExactAssemblyScope;
40 using vixl::CodeBufferCheckScope;
41 using vixl::EmissionCheckScope;
42
43 #ifdef __
44 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
45 #endif
46
47 namespace art {
48
49 template<class MirrorType>
50 class GcRoot;
51
52 namespace arm64 {
53
54 using helpers::ARM64EncodableConstantOrRegister;
55 using helpers::ArtVixlRegCodeCoherentForRegSet;
56 using helpers::CPURegisterFrom;
57 using helpers::DRegisterFrom;
58 using helpers::FPRegisterFrom;
59 using helpers::HeapOperand;
60 using helpers::HeapOperandFrom;
61 using helpers::InputCPURegisterAt;
62 using helpers::InputCPURegisterOrZeroRegAt;
63 using helpers::InputFPRegisterAt;
64 using helpers::InputOperandAt;
65 using helpers::InputRegisterAt;
66 using helpers::Int64ConstantFrom;
67 using helpers::IsConstantZeroBitPattern;
68 using helpers::LocationFrom;
69 using helpers::OperandFromMemOperand;
70 using helpers::OutputCPURegister;
71 using helpers::OutputFPRegister;
72 using helpers::OutputRegister;
73 using helpers::QRegisterFrom;
74 using helpers::RegisterFrom;
75 using helpers::StackOperandFrom;
76 using helpers::VIXLRegCodeFromART;
77 using helpers::WRegisterFrom;
78 using helpers::XRegisterFrom;
79
80 static constexpr int kCurrentMethodStackOffset = 0;
81 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
82 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
83 // generates less code/data with a small num_entries.
84 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
85
86 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
87 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
88 // For the Baker read barrier implementation using link-generated thunks we need to split
89 // the offset explicitly.
90 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
91
92 // Flags controlling the use of link-time generated thunks for Baker read barriers.
93 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
94 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
95
96 // Some instructions have special requirements for a temporary, for example
97 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
98 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
99 // loads with large offsets need a fixed register to limit the number of link-time
100 // thunks we generate. For these and similar cases, we want to reserve a specific
101 // register that's neither callee-save nor an argument register. We choose x15.
FixedTempLocation()102 inline Location FixedTempLocation() {
103 return Location::RegisterLocation(x15.GetCode());
104 }
105
ARM64Condition(IfCondition cond)106 inline Condition ARM64Condition(IfCondition cond) {
107 switch (cond) {
108 case kCondEQ: return eq;
109 case kCondNE: return ne;
110 case kCondLT: return lt;
111 case kCondLE: return le;
112 case kCondGT: return gt;
113 case kCondGE: return ge;
114 case kCondB: return lo;
115 case kCondBE: return ls;
116 case kCondA: return hi;
117 case kCondAE: return hs;
118 }
119 LOG(FATAL) << "Unreachable";
120 UNREACHABLE();
121 }
122
ARM64FPCondition(IfCondition cond,bool gt_bias)123 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
124 // The ARM64 condition codes can express all the necessary branches, see the
125 // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
126 // There is no dex instruction or HIR that would need the missing conditions
127 // "equal or unordered" or "not equal".
128 switch (cond) {
129 case kCondEQ: return eq;
130 case kCondNE: return ne /* unordered */;
131 case kCondLT: return gt_bias ? cc : lt /* unordered */;
132 case kCondLE: return gt_bias ? ls : le /* unordered */;
133 case kCondGT: return gt_bias ? hi /* unordered */ : gt;
134 case kCondGE: return gt_bias ? cs /* unordered */ : ge;
135 default:
136 LOG(FATAL) << "UNREACHABLE";
137 UNREACHABLE();
138 }
139 }
140
ARM64ReturnLocation(Primitive::Type return_type)141 Location ARM64ReturnLocation(Primitive::Type return_type) {
142 // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
143 // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
144 // but we use the exact registers for clarity.
145 if (return_type == Primitive::kPrimFloat) {
146 return LocationFrom(s0);
147 } else if (return_type == Primitive::kPrimDouble) {
148 return LocationFrom(d0);
149 } else if (return_type == Primitive::kPrimLong) {
150 return LocationFrom(x0);
151 } else if (return_type == Primitive::kPrimVoid) {
152 return Location::NoLocation();
153 } else {
154 return LocationFrom(w0);
155 }
156 }
157
GetReturnLocation(Primitive::Type return_type)158 Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) {
159 return ARM64ReturnLocation(return_type);
160 }
161
162 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
163 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
164 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
165
166 // Calculate memory accessing operand for save/restore live registers.
SaveRestoreLiveRegistersHelper(CodeGenerator * codegen,LocationSummary * locations,int64_t spill_offset,bool is_save)167 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
168 LocationSummary* locations,
169 int64_t spill_offset,
170 bool is_save) {
171 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
172 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
173 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
174 codegen->GetNumberOfCoreRegisters(),
175 fp_spills,
176 codegen->GetNumberOfFloatingPointRegisters()));
177
178 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
179 unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
180 CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
181
182 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
183 UseScratchRegisterScope temps(masm);
184
185 Register base = masm->StackPointer();
186 int64_t core_spill_size = core_list.GetTotalSizeInBytes();
187 int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
188 int64_t reg_size = kXRegSizeInBytes;
189 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
190 uint32_t ls_access_size = WhichPowerOf2(reg_size);
191 if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
192 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
193 // If the offset does not fit in the instruction's immediate field, use an alternate register
194 // to compute the base address(float point registers spill base address).
195 Register new_base = temps.AcquireSameSizeAs(base);
196 __ Add(new_base, base, Operand(spill_offset + core_spill_size));
197 base = new_base;
198 spill_offset = -core_spill_size;
199 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
200 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
201 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
202 }
203
204 if (is_save) {
205 __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
206 __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
207 } else {
208 __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
209 __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
210 }
211 }
212
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)213 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
214 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
215 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
216 for (uint32_t i : LowToHighBits(core_spills)) {
217 // If the register holds an object, update the stack mask.
218 if (locations->RegisterContainsObject(i)) {
219 locations->SetStackBit(stack_offset / kVRegSize);
220 }
221 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
222 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
223 saved_core_stack_offsets_[i] = stack_offset;
224 stack_offset += kXRegSizeInBytes;
225 }
226
227 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
228 for (uint32_t i : LowToHighBits(fp_spills)) {
229 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
230 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
231 saved_fpu_stack_offsets_[i] = stack_offset;
232 stack_offset += kDRegSizeInBytes;
233 }
234
235 SaveRestoreLiveRegistersHelper(codegen,
236 locations,
237 codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
238 }
239
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)240 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
241 SaveRestoreLiveRegistersHelper(codegen,
242 locations,
243 codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
244 }
245
246 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
247 public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)248 explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
249
EmitNativeCode(CodeGenerator * codegen)250 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
251 LocationSummary* locations = instruction_->GetLocations();
252 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
253
254 __ Bind(GetEntryLabel());
255 if (instruction_->CanThrowIntoCatchBlock()) {
256 // Live registers will be restored in the catch block if caught.
257 SaveLiveRegisters(codegen, instruction_->GetLocations());
258 }
259 // We're moving two locations to locations that could overlap, so we need a parallel
260 // move resolver.
261 InvokeRuntimeCallingConvention calling_convention;
262 codegen->EmitParallelMoves(
263 locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
264 locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
265 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
266 ? kQuickThrowStringBounds
267 : kQuickThrowArrayBounds;
268 arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
269 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
270 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
271 }
272
IsFatal() const273 bool IsFatal() const OVERRIDE { return true; }
274
GetDescription() const275 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
276
277 private:
278 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
279 };
280
281 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
282 public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)283 explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
284
EmitNativeCode(CodeGenerator * codegen)285 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
286 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
287 __ Bind(GetEntryLabel());
288 arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
289 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
290 }
291
IsFatal() const292 bool IsFatal() const OVERRIDE { return true; }
293
GetDescription() const294 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
295
296 private:
297 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
298 };
299
300 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
301 public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit,vixl::aarch64::Register bss_entry_temp=vixl::aarch64::Register (),vixl::aarch64::Label * bss_entry_adrp_label=nullptr)302 LoadClassSlowPathARM64(HLoadClass* cls,
303 HInstruction* at,
304 uint32_t dex_pc,
305 bool do_clinit,
306 vixl::aarch64::Register bss_entry_temp = vixl::aarch64::Register(),
307 vixl::aarch64::Label* bss_entry_adrp_label = nullptr)
308 : SlowPathCodeARM64(at),
309 cls_(cls),
310 dex_pc_(dex_pc),
311 do_clinit_(do_clinit),
312 bss_entry_temp_(bss_entry_temp),
313 bss_entry_adrp_label_(bss_entry_adrp_label) {
314 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
315 }
316
EmitNativeCode(CodeGenerator * codegen)317 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
318 LocationSummary* locations = instruction_->GetLocations();
319 Location out = locations->Out();
320 constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier);
321 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
322
323 InvokeRuntimeCallingConvention calling_convention;
324 // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp
325 // register, make sure it's not clobbered by the call or by saving/restoring registers.
326 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
327 bool is_load_class_bss_entry =
328 (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
329 if (is_load_class_bss_entry) {
330 DCHECK(bss_entry_temp_.IsValid());
331 DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0)));
332 DCHECK(
333 !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_));
334 }
335
336 __ Bind(GetEntryLabel());
337 SaveLiveRegisters(codegen, locations);
338
339 dex::TypeIndex type_index = cls_->GetTypeIndex();
340 __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
341 QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
342 : kQuickInitializeType;
343 arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
344 if (do_clinit_) {
345 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
346 } else {
347 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
348 }
349
350 // Move the class to the desired location.
351 if (out.IsValid()) {
352 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
353 Primitive::Type type = instruction_->GetType();
354 arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
355 }
356 RestoreLiveRegisters(codegen, locations);
357 // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
358 if (is_load_class_bss_entry) {
359 DCHECK(out.IsValid());
360 const DexFile& dex_file = cls_->GetDexFile();
361 if (call_saves_everything_except_r0_ip0) {
362 // The class entry page address was preserved in bss_entry_temp_ thanks to kSaveEverything.
363 } else {
364 // For non-Baker read barrier, we need to re-calculate the address of the class entry page.
365 bss_entry_adrp_label_ = arm64_codegen->NewBssEntryTypePatch(dex_file, type_index);
366 arm64_codegen->EmitAdrpPlaceholder(bss_entry_adrp_label_, bss_entry_temp_);
367 }
368 vixl::aarch64::Label* strp_label =
369 arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label_);
370 {
371 SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
372 __ Bind(strp_label);
373 __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
374 MemOperand(bss_entry_temp_, /* offset placeholder */ 0));
375 }
376 }
377 __ B(GetExitLabel());
378 }
379
GetDescription() const380 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
381
382 private:
383 // The class this slow path will load.
384 HLoadClass* const cls_;
385
386 // The dex PC of `at_`.
387 const uint32_t dex_pc_;
388
389 // Whether to initialize the class.
390 const bool do_clinit_;
391
392 // For HLoadClass/kBssEntry, the temp register and the label of the ADRP where it was loaded.
393 vixl::aarch64::Register bss_entry_temp_;
394 vixl::aarch64::Label* bss_entry_adrp_label_;
395
396 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
397 };
398
399 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
400 public:
LoadStringSlowPathARM64(HLoadString * instruction,Register temp,vixl::aarch64::Label * adrp_label)401 LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label)
402 : SlowPathCodeARM64(instruction),
403 temp_(temp),
404 adrp_label_(adrp_label) {}
405
EmitNativeCode(CodeGenerator * codegen)406 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
407 LocationSummary* locations = instruction_->GetLocations();
408 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
409 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
410
411 InvokeRuntimeCallingConvention calling_convention;
412 // Make sure `temp_` is not clobbered by the call or by saving/restoring registers.
413 DCHECK(temp_.IsValid());
414 DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0)));
415 DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_));
416
417 __ Bind(GetEntryLabel());
418 SaveLiveRegisters(codegen, locations);
419
420 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
421 __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
422 arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
423 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
424 Primitive::Type type = instruction_->GetType();
425 arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
426
427 RestoreLiveRegisters(codegen, locations);
428
429 // Store the resolved String to the BSS entry.
430 const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile();
431 if (!kUseReadBarrier || kUseBakerReadBarrier) {
432 // The string entry page address was preserved in temp_ thanks to kSaveEverything.
433 } else {
434 // For non-Baker read barrier, we need to re-calculate the address of the string entry page.
435 adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
436 arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
437 }
438 vixl::aarch64::Label* strp_label =
439 arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
440 {
441 SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
442 __ Bind(strp_label);
443 __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
444 MemOperand(temp_, /* offset placeholder */ 0));
445 }
446
447 __ B(GetExitLabel());
448 }
449
GetDescription() const450 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
451
452 private:
453 const Register temp_;
454 vixl::aarch64::Label* adrp_label_;
455
456 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
457 };
458
459 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
460 public:
NullCheckSlowPathARM64(HNullCheck * instr)461 explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
462
EmitNativeCode(CodeGenerator * codegen)463 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
464 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
465 __ Bind(GetEntryLabel());
466 if (instruction_->CanThrowIntoCatchBlock()) {
467 // Live registers will be restored in the catch block if caught.
468 SaveLiveRegisters(codegen, instruction_->GetLocations());
469 }
470 arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
471 instruction_,
472 instruction_->GetDexPc(),
473 this);
474 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
475 }
476
IsFatal() const477 bool IsFatal() const OVERRIDE { return true; }
478
GetDescription() const479 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
480
481 private:
482 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
483 };
484
485 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
486 public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)487 SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
488 : SlowPathCodeARM64(instruction), successor_(successor) {}
489
EmitNativeCode(CodeGenerator * codegen)490 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
491 LocationSummary* locations = instruction_->GetLocations();
492 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
493 __ Bind(GetEntryLabel());
494 SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD.
495 arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
496 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
497 RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD.
498 if (successor_ == nullptr) {
499 __ B(GetReturnLabel());
500 } else {
501 __ B(arm64_codegen->GetLabelOf(successor_));
502 }
503 }
504
GetReturnLabel()505 vixl::aarch64::Label* GetReturnLabel() {
506 DCHECK(successor_ == nullptr);
507 return &return_label_;
508 }
509
GetSuccessor() const510 HBasicBlock* GetSuccessor() const {
511 return successor_;
512 }
513
GetDescription() const514 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
515
516 private:
517 // If not null, the block to branch to after the suspend check.
518 HBasicBlock* const successor_;
519
520 // If `successor_` is null, the label to branch to after the suspend check.
521 vixl::aarch64::Label return_label_;
522
523 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
524 };
525
526 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
527 public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)528 TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
529 : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
530
EmitNativeCode(CodeGenerator * codegen)531 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
532 LocationSummary* locations = instruction_->GetLocations();
533
534 DCHECK(instruction_->IsCheckCast()
535 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
536 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
537 uint32_t dex_pc = instruction_->GetDexPc();
538
539 __ Bind(GetEntryLabel());
540
541 if (!is_fatal_) {
542 SaveLiveRegisters(codegen, locations);
543 }
544
545 // We're moving two locations to locations that could overlap, so we need a parallel
546 // move resolver.
547 InvokeRuntimeCallingConvention calling_convention;
548 codegen->EmitParallelMoves(locations->InAt(0),
549 LocationFrom(calling_convention.GetRegisterAt(0)),
550 Primitive::kPrimNot,
551 locations->InAt(1),
552 LocationFrom(calling_convention.GetRegisterAt(1)),
553 Primitive::kPrimNot);
554 if (instruction_->IsInstanceOf()) {
555 arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
556 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
557 Primitive::Type ret_type = instruction_->GetType();
558 Location ret_loc = calling_convention.GetReturnLocation(ret_type);
559 arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
560 } else {
561 DCHECK(instruction_->IsCheckCast());
562 arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
563 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
564 }
565
566 if (!is_fatal_) {
567 RestoreLiveRegisters(codegen, locations);
568 __ B(GetExitLabel());
569 }
570 }
571
GetDescription() const572 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
IsFatal() const573 bool IsFatal() const OVERRIDE { return is_fatal_; }
574
575 private:
576 const bool is_fatal_;
577
578 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
579 };
580
581 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
582 public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)583 explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
584 : SlowPathCodeARM64(instruction) {}
585
EmitNativeCode(CodeGenerator * codegen)586 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
587 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
588 __ Bind(GetEntryLabel());
589 LocationSummary* locations = instruction_->GetLocations();
590 SaveLiveRegisters(codegen, locations);
591 InvokeRuntimeCallingConvention calling_convention;
592 __ Mov(calling_convention.GetRegisterAt(0),
593 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
594 arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
595 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
596 }
597
GetDescription() const598 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
599
600 private:
601 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
602 };
603
604 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
605 public:
ArraySetSlowPathARM64(HInstruction * instruction)606 explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
607
EmitNativeCode(CodeGenerator * codegen)608 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
609 LocationSummary* locations = instruction_->GetLocations();
610 __ Bind(GetEntryLabel());
611 SaveLiveRegisters(codegen, locations);
612
613 InvokeRuntimeCallingConvention calling_convention;
614 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
615 parallel_move.AddMove(
616 locations->InAt(0),
617 LocationFrom(calling_convention.GetRegisterAt(0)),
618 Primitive::kPrimNot,
619 nullptr);
620 parallel_move.AddMove(
621 locations->InAt(1),
622 LocationFrom(calling_convention.GetRegisterAt(1)),
623 Primitive::kPrimInt,
624 nullptr);
625 parallel_move.AddMove(
626 locations->InAt(2),
627 LocationFrom(calling_convention.GetRegisterAt(2)),
628 Primitive::kPrimNot,
629 nullptr);
630 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
631
632 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
633 arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
634 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
635 RestoreLiveRegisters(codegen, locations);
636 __ B(GetExitLabel());
637 }
638
GetDescription() const639 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
640
641 private:
642 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
643 };
644
EmitTable(CodeGeneratorARM64 * codegen)645 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
646 uint32_t num_entries = switch_instr_->GetNumEntries();
647 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
648
649 // We are about to use the assembler to place literals directly. Make sure we have enough
650 // underlying code buffer and we have generated the jump table with right size.
651 EmissionCheckScope scope(codegen->GetVIXLAssembler(),
652 num_entries * sizeof(int32_t),
653 CodeBufferCheckScope::kExactSize);
654
655 __ Bind(&table_start_);
656 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
657 for (uint32_t i = 0; i < num_entries; i++) {
658 vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
659 DCHECK(target_label->IsBound());
660 ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
661 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
662 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
663 Literal<int32_t> literal(jump_offset);
664 __ place(&literal);
665 }
666 }
667
668 // Abstract base class for read barrier slow paths marking a reference
669 // `ref`.
670 //
671 // Argument `entrypoint` must be a register location holding the read
672 // barrier marking runtime entry point to be invoked.
673 class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
674 protected:
ReadBarrierMarkSlowPathBaseARM64(HInstruction * instruction,Location ref,Location entrypoint)675 ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
676 : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
677 DCHECK(kEmitCompilerReadBarrier);
678 }
679
GetDescription() const680 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
681
682 // Generate assembly code calling the read barrier marking runtime
683 // entry point (ReadBarrierMarkRegX).
GenerateReadBarrierMarkRuntimeCall(CodeGenerator * codegen)684 void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
685 // No need to save live registers; it's taken care of by the
686 // entrypoint. Also, there is no need to update the stack mask,
687 // as this runtime call will not trigger a garbage collection.
688 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
689 DCHECK_NE(ref_.reg(), LR);
690 DCHECK_NE(ref_.reg(), WSP);
691 DCHECK_NE(ref_.reg(), WZR);
692 // IP0 is used internally by the ReadBarrierMarkRegX entry point
693 // as a temporary, it cannot be the entry point's input/output.
694 DCHECK_NE(ref_.reg(), IP0);
695 DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
696 // "Compact" slow path, saving two moves.
697 //
698 // Instead of using the standard runtime calling convention (input
699 // and output in W0):
700 //
701 // W0 <- ref
702 // W0 <- ReadBarrierMark(W0)
703 // ref <- W0
704 //
705 // we just use rX (the register containing `ref`) as input and output
706 // of a dedicated entrypoint:
707 //
708 // rX <- ReadBarrierMarkRegX(rX)
709 //
710 if (entrypoint_.IsValid()) {
711 arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
712 __ Blr(XRegisterFrom(entrypoint_));
713 } else {
714 // Entrypoint is not already loaded, load from the thread.
715 int32_t entry_point_offset =
716 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
717 // This runtime call does not require a stack map.
718 arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
719 }
720 }
721
722 // The location (register) of the marked object reference.
723 const Location ref_;
724
725 // The location of the entrypoint if it is already loaded.
726 const Location entrypoint_;
727
728 private:
729 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
730 };
731
732 // Slow path marking an object reference `ref` during a read
733 // barrier. The field `obj.field` in the object `obj` holding this
734 // reference does not get updated by this slow path after marking.
735 //
736 // This means that after the execution of this slow path, `ref` will
737 // always be up-to-date, but `obj.field` may not; i.e., after the
738 // flip, `ref` will be a to-space reference, but `obj.field` will
739 // probably still be a from-space reference (unless it gets updated by
740 // another thread, or if another thread installed another object
741 // reference (different from `ref`) in `obj.field`).
742 //
743 // If `entrypoint` is a valid location it is assumed to already be
744 // holding the entrypoint. The case where the entrypoint is passed in
745 // is when the decision to mark is based on whether the GC is marking.
746 class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
747 public:
ReadBarrierMarkSlowPathARM64(HInstruction * instruction,Location ref,Location entrypoint=Location::NoLocation ())748 ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
749 Location ref,
750 Location entrypoint = Location::NoLocation())
751 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
752 DCHECK(kEmitCompilerReadBarrier);
753 }
754
GetDescription() const755 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
756
EmitNativeCode(CodeGenerator * codegen)757 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
758 LocationSummary* locations = instruction_->GetLocations();
759 DCHECK(locations->CanCall());
760 DCHECK(ref_.IsRegister()) << ref_;
761 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
762 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
763 << "Unexpected instruction in read barrier marking slow path: "
764 << instruction_->DebugName();
765
766 __ Bind(GetEntryLabel());
767 GenerateReadBarrierMarkRuntimeCall(codegen);
768 __ B(GetExitLabel());
769 }
770
771 private:
772 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
773 };
774
775 // Slow path loading `obj`'s lock word, loading a reference from
776 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
777 // marking `ref` if `obj` is gray according to the lock word (Baker
778 // read barrier). The field `obj.field` in the object `obj` holding
779 // this reference does not get updated by this slow path after marking
780 // (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
781 // below for that).
782 //
783 // This means that after the execution of this slow path, `ref` will
784 // always be up-to-date, but `obj.field` may not; i.e., after the
785 // flip, `ref` will be a to-space reference, but `obj.field` will
786 // probably still be a from-space reference (unless it gets updated by
787 // another thread, or if another thread installed another object
788 // reference (different from `ref`) in `obj.field`).
789 //
790 // Argument `entrypoint` must be a register location holding the read
791 // barrier marking runtime entry point to be invoked.
792 class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
793 public:
LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire,Register temp,Location entrypoint)794 LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
795 Location ref,
796 Register obj,
797 uint32_t offset,
798 Location index,
799 size_t scale_factor,
800 bool needs_null_check,
801 bool use_load_acquire,
802 Register temp,
803 Location entrypoint)
804 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
805 obj_(obj),
806 offset_(offset),
807 index_(index),
808 scale_factor_(scale_factor),
809 needs_null_check_(needs_null_check),
810 use_load_acquire_(use_load_acquire),
811 temp_(temp) {
812 DCHECK(kEmitCompilerReadBarrier);
813 DCHECK(kUseBakerReadBarrier);
814 }
815
GetDescription() const816 const char* GetDescription() const OVERRIDE {
817 return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
818 }
819
EmitNativeCode(CodeGenerator * codegen)820 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
821 LocationSummary* locations = instruction_->GetLocations();
822 DCHECK(locations->CanCall());
823 DCHECK(ref_.IsRegister()) << ref_;
824 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
825 DCHECK(obj_.IsW());
826 DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
827 DCHECK(instruction_->IsInstanceFieldGet() ||
828 instruction_->IsStaticFieldGet() ||
829 instruction_->IsArrayGet() ||
830 instruction_->IsArraySet() ||
831 instruction_->IsInstanceOf() ||
832 instruction_->IsCheckCast() ||
833 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
834 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
835 << "Unexpected instruction in read barrier marking slow path: "
836 << instruction_->DebugName();
837 // The read barrier instrumentation of object ArrayGet
838 // instructions does not support the HIntermediateAddress
839 // instruction.
840 DCHECK(!(instruction_->IsArrayGet() &&
841 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
842
843 // Temporary register `temp_`, used to store the lock word, must
844 // not be IP0 nor IP1, as we may use them to emit the reference
845 // load (in the call to GenerateRawReferenceLoad below), and we
846 // need the lock word to still be in `temp_` after the reference
847 // load.
848 DCHECK_NE(LocationFrom(temp_).reg(), IP0);
849 DCHECK_NE(LocationFrom(temp_).reg(), IP1);
850
851 __ Bind(GetEntryLabel());
852
853 // When using MaybeGenerateReadBarrierSlow, the read barrier call is
854 // inserted after the original load. However, in fast path based
855 // Baker's read barriers, we need to perform the load of
856 // mirror::Object::monitor_ *before* the original reference load.
857 // This load-load ordering is required by the read barrier.
858 // The fast path/slow path (for Baker's algorithm) should look like:
859 //
860 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
861 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
862 // HeapReference<mirror::Object> ref = *src; // Original reference load.
863 // bool is_gray = (rb_state == ReadBarrier::GrayState());
864 // if (is_gray) {
865 // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
866 // }
867 //
868 // Note: the original implementation in ReadBarrier::Barrier is
869 // slightly more complex as it performs additional checks that we do
870 // not do here for performance reasons.
871
872 // /* int32_t */ monitor = obj->monitor_
873 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
874 __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
875 if (needs_null_check_) {
876 codegen->MaybeRecordImplicitNullCheck(instruction_);
877 }
878 // /* LockWord */ lock_word = LockWord(monitor)
879 static_assert(sizeof(LockWord) == sizeof(int32_t),
880 "art::LockWord and int32_t have different sizes.");
881
882 // Introduce a dependency on the lock_word including rb_state,
883 // to prevent load-load reordering, and without using
884 // a memory barrier (which would be more expensive).
885 // `obj` is unchanged by this operation, but its value now depends
886 // on `temp`.
887 __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
888
889 // The actual reference load.
890 // A possible implicit null check has already been handled above.
891 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
892 arm64_codegen->GenerateRawReferenceLoad(instruction_,
893 ref_,
894 obj_,
895 offset_,
896 index_,
897 scale_factor_,
898 /* needs_null_check */ false,
899 use_load_acquire_);
900
901 // Mark the object `ref` when `obj` is gray.
902 //
903 // if (rb_state == ReadBarrier::GrayState())
904 // ref = ReadBarrier::Mark(ref);
905 //
906 // Given the numeric representation, it's enough to check the low bit of the rb_state.
907 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
908 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
909 __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
910 GenerateReadBarrierMarkRuntimeCall(codegen);
911
912 __ B(GetExitLabel());
913 }
914
915 private:
916 // The register containing the object holding the marked object reference field.
917 Register obj_;
918 // The offset, index and scale factor to access the reference in `obj_`.
919 uint32_t offset_;
920 Location index_;
921 size_t scale_factor_;
922 // Is a null check required?
923 bool needs_null_check_;
924 // Should this reference load use Load-Acquire semantics?
925 bool use_load_acquire_;
926 // A temporary register used to hold the lock word of `obj_`.
927 Register temp_;
928
929 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
930 };
931
932 // Slow path loading `obj`'s lock word, loading a reference from
933 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
934 // marking `ref` if `obj` is gray according to the lock word (Baker
935 // read barrier). If needed, this slow path also atomically updates
936 // the field `obj.field` in the object `obj` holding this reference
937 // after marking (contrary to
938 // LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
939 // tries to update `obj.field`).
940 //
941 // This means that after the execution of this slow path, both `ref`
942 // and `obj.field` will be up-to-date; i.e., after the flip, both will
943 // hold the same to-space reference (unless another thread installed
944 // another object reference (different from `ref`) in `obj.field`).
945 //
946 // Argument `entrypoint` must be a register location holding the read
947 // barrier marking runtime entry point to be invoked.
948 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
949 : public ReadBarrierMarkSlowPathBaseARM64 {
950 public:
LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire,Register temp,Location entrypoint)951 LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
952 Location ref,
953 Register obj,
954 uint32_t offset,
955 Location index,
956 size_t scale_factor,
957 bool needs_null_check,
958 bool use_load_acquire,
959 Register temp,
960 Location entrypoint)
961 : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
962 obj_(obj),
963 offset_(offset),
964 index_(index),
965 scale_factor_(scale_factor),
966 needs_null_check_(needs_null_check),
967 use_load_acquire_(use_load_acquire),
968 temp_(temp) {
969 DCHECK(kEmitCompilerReadBarrier);
970 DCHECK(kUseBakerReadBarrier);
971 }
972
GetDescription() const973 const char* GetDescription() const OVERRIDE {
974 return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
975 }
976
EmitNativeCode(CodeGenerator * codegen)977 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
978 LocationSummary* locations = instruction_->GetLocations();
979 Register ref_reg = WRegisterFrom(ref_);
980 DCHECK(locations->CanCall());
981 DCHECK(ref_.IsRegister()) << ref_;
982 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
983 DCHECK(obj_.IsW());
984 DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
985
986 // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
987 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
988 << "Unexpected instruction in read barrier marking and field updating slow path: "
989 << instruction_->DebugName();
990 DCHECK(instruction_->GetLocations()->Intrinsified());
991 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
992 DCHECK_EQ(offset_, 0u);
993 DCHECK_EQ(scale_factor_, 0u);
994 DCHECK_EQ(use_load_acquire_, false);
995 // The location of the offset of the marked reference field within `obj_`.
996 Location field_offset = index_;
997 DCHECK(field_offset.IsRegister()) << field_offset;
998
999 // Temporary register `temp_`, used to store the lock word, must
1000 // not be IP0 nor IP1, as we may use them to emit the reference
1001 // load (in the call to GenerateRawReferenceLoad below), and we
1002 // need the lock word to still be in `temp_` after the reference
1003 // load.
1004 DCHECK_NE(LocationFrom(temp_).reg(), IP0);
1005 DCHECK_NE(LocationFrom(temp_).reg(), IP1);
1006
1007 __ Bind(GetEntryLabel());
1008
1009 // /* int32_t */ monitor = obj->monitor_
1010 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1011 __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
1012 if (needs_null_check_) {
1013 codegen->MaybeRecordImplicitNullCheck(instruction_);
1014 }
1015 // /* LockWord */ lock_word = LockWord(monitor)
1016 static_assert(sizeof(LockWord) == sizeof(int32_t),
1017 "art::LockWord and int32_t have different sizes.");
1018
1019 // Introduce a dependency on the lock_word including rb_state,
1020 // to prevent load-load reordering, and without using
1021 // a memory barrier (which would be more expensive).
1022 // `obj` is unchanged by this operation, but its value now depends
1023 // on `temp`.
1024 __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
1025
1026 // The actual reference load.
1027 // A possible implicit null check has already been handled above.
1028 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1029 arm64_codegen->GenerateRawReferenceLoad(instruction_,
1030 ref_,
1031 obj_,
1032 offset_,
1033 index_,
1034 scale_factor_,
1035 /* needs_null_check */ false,
1036 use_load_acquire_);
1037
1038 // Mark the object `ref` when `obj` is gray.
1039 //
1040 // if (rb_state == ReadBarrier::GrayState())
1041 // ref = ReadBarrier::Mark(ref);
1042 //
1043 // Given the numeric representation, it's enough to check the low bit of the rb_state.
1044 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
1045 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1046 __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
1047
1048 // Save the old value of the reference before marking it.
1049 // Note that we cannot use IP to save the old reference, as IP is
1050 // used internally by the ReadBarrierMarkRegX entry point, and we
1051 // need the old reference after the call to that entry point.
1052 DCHECK_NE(LocationFrom(temp_).reg(), IP0);
1053 __ Mov(temp_.W(), ref_reg);
1054
1055 GenerateReadBarrierMarkRuntimeCall(codegen);
1056
1057 // If the new reference is different from the old reference,
1058 // update the field in the holder (`*(obj_ + field_offset)`).
1059 //
1060 // Note that this field could also hold a different object, if
1061 // another thread had concurrently changed it. In that case, the
1062 // LDXR/CMP/BNE sequence of instructions in the compare-and-set
1063 // (CAS) operation below would abort the CAS, leaving the field
1064 // as-is.
1065 __ Cmp(temp_.W(), ref_reg);
1066 __ B(eq, GetExitLabel());
1067
1068 // Update the the holder's field atomically. This may fail if
1069 // mutator updates before us, but it's OK. This is achieved
1070 // using a strong compare-and-set (CAS) operation with relaxed
1071 // memory synchronization ordering, where the expected value is
1072 // the old reference and the desired value is the new reference.
1073
1074 MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
1075 UseScratchRegisterScope temps(masm);
1076
1077 // Convenience aliases.
1078 Register base = obj_.W();
1079 Register offset = XRegisterFrom(field_offset);
1080 Register expected = temp_.W();
1081 Register value = ref_reg;
1082 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
1083 Register tmp_value = temps.AcquireW(); // Value in memory.
1084
1085 __ Add(tmp_ptr, base.X(), Operand(offset));
1086
1087 if (kPoisonHeapReferences) {
1088 arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
1089 if (value.Is(expected)) {
1090 // Do not poison `value`, as it is the same register as
1091 // `expected`, which has just been poisoned.
1092 } else {
1093 arm64_codegen->GetAssembler()->PoisonHeapReference(value);
1094 }
1095 }
1096
1097 // do {
1098 // tmp_value = [tmp_ptr] - expected;
1099 // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
1100
1101 vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
1102 __ Bind(&loop_head);
1103 __ Ldxr(tmp_value, MemOperand(tmp_ptr));
1104 __ Cmp(tmp_value, expected);
1105 __ B(&comparison_failed, ne);
1106 __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
1107 __ Cbnz(tmp_value, &loop_head);
1108 __ B(&exit_loop);
1109 __ Bind(&comparison_failed);
1110 __ Clrex();
1111 __ Bind(&exit_loop);
1112
1113 if (kPoisonHeapReferences) {
1114 arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
1115 if (value.Is(expected)) {
1116 // Do not unpoison `value`, as it is the same register as
1117 // `expected`, which has just been unpoisoned.
1118 } else {
1119 arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
1120 }
1121 }
1122
1123 __ B(GetExitLabel());
1124 }
1125
1126 private:
1127 // The register containing the object holding the marked object reference field.
1128 const Register obj_;
1129 // The offset, index and scale factor to access the reference in `obj_`.
1130 uint32_t offset_;
1131 Location index_;
1132 size_t scale_factor_;
1133 // Is a null check required?
1134 bool needs_null_check_;
1135 // Should this reference load use Load-Acquire semantics?
1136 bool use_load_acquire_;
1137 // A temporary register used to hold the lock word of `obj_`; and
1138 // also to hold the original reference value, when the reference is
1139 // marked.
1140 const Register temp_;
1141
1142 DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
1143 };
1144
1145 // Slow path generating a read barrier for a heap reference.
1146 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
1147 public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)1148 ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
1149 Location out,
1150 Location ref,
1151 Location obj,
1152 uint32_t offset,
1153 Location index)
1154 : SlowPathCodeARM64(instruction),
1155 out_(out),
1156 ref_(ref),
1157 obj_(obj),
1158 offset_(offset),
1159 index_(index) {
1160 DCHECK(kEmitCompilerReadBarrier);
1161 // If `obj` is equal to `out` or `ref`, it means the initial object
1162 // has been overwritten by (or after) the heap object reference load
1163 // to be instrumented, e.g.:
1164 //
1165 // __ Ldr(out, HeapOperand(out, class_offset);
1166 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
1167 //
1168 // In that case, we have lost the information about the original
1169 // object, and the emitted read barrier cannot work properly.
1170 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
1171 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
1172 }
1173
EmitNativeCode(CodeGenerator * codegen)1174 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1175 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1176 LocationSummary* locations = instruction_->GetLocations();
1177 Primitive::Type type = Primitive::kPrimNot;
1178 DCHECK(locations->CanCall());
1179 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1180 DCHECK(instruction_->IsInstanceFieldGet() ||
1181 instruction_->IsStaticFieldGet() ||
1182 instruction_->IsArrayGet() ||
1183 instruction_->IsInstanceOf() ||
1184 instruction_->IsCheckCast() ||
1185 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
1186 << "Unexpected instruction in read barrier for heap reference slow path: "
1187 << instruction_->DebugName();
1188 // The read barrier instrumentation of object ArrayGet
1189 // instructions does not support the HIntermediateAddress
1190 // instruction.
1191 DCHECK(!(instruction_->IsArrayGet() &&
1192 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
1193
1194 __ Bind(GetEntryLabel());
1195
1196 SaveLiveRegisters(codegen, locations);
1197
1198 // We may have to change the index's value, but as `index_` is a
1199 // constant member (like other "inputs" of this slow path),
1200 // introduce a copy of it, `index`.
1201 Location index = index_;
1202 if (index_.IsValid()) {
1203 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
1204 if (instruction_->IsArrayGet()) {
1205 // Compute the actual memory offset and store it in `index`.
1206 Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
1207 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
1208 if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
1209 // We are about to change the value of `index_reg` (see the
1210 // calls to vixl::MacroAssembler::Lsl and
1211 // vixl::MacroAssembler::Mov below), but it has
1212 // not been saved by the previous call to
1213 // art::SlowPathCode::SaveLiveRegisters, as it is a
1214 // callee-save register --
1215 // art::SlowPathCode::SaveLiveRegisters does not consider
1216 // callee-save registers, as it has been designed with the
1217 // assumption that callee-save registers are supposed to be
1218 // handled by the called function. So, as a callee-save
1219 // register, `index_reg` _would_ eventually be saved onto
1220 // the stack, but it would be too late: we would have
1221 // changed its value earlier. Therefore, we manually save
1222 // it here into another freely available register,
1223 // `free_reg`, chosen of course among the caller-save
1224 // registers (as a callee-save `free_reg` register would
1225 // exhibit the same problem).
1226 //
1227 // Note we could have requested a temporary register from
1228 // the register allocator instead; but we prefer not to, as
1229 // this is a slow path, and we know we can find a
1230 // caller-save register that is available.
1231 Register free_reg = FindAvailableCallerSaveRegister(codegen);
1232 __ Mov(free_reg.W(), index_reg);
1233 index_reg = free_reg;
1234 index = LocationFrom(index_reg);
1235 } else {
1236 // The initial register stored in `index_` has already been
1237 // saved in the call to art::SlowPathCode::SaveLiveRegisters
1238 // (as it is not a callee-save register), so we can freely
1239 // use it.
1240 }
1241 // Shifting the index value contained in `index_reg` by the scale
1242 // factor (2) cannot overflow in practice, as the runtime is
1243 // unable to allocate object arrays with a size larger than
1244 // 2^26 - 1 (that is, 2^28 - 4 bytes).
1245 __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
1246 static_assert(
1247 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
1248 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
1249 __ Add(index_reg, index_reg, Operand(offset_));
1250 } else {
1251 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
1252 // intrinsics, `index_` is not shifted by a scale factor of 2
1253 // (as in the case of ArrayGet), as it is actually an offset
1254 // to an object field within an object.
1255 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
1256 DCHECK(instruction_->GetLocations()->Intrinsified());
1257 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
1258 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
1259 << instruction_->AsInvoke()->GetIntrinsic();
1260 DCHECK_EQ(offset_, 0u);
1261 DCHECK(index_.IsRegister());
1262 }
1263 }
1264
1265 // We're moving two or three locations to locations that could
1266 // overlap, so we need a parallel move resolver.
1267 InvokeRuntimeCallingConvention calling_convention;
1268 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
1269 parallel_move.AddMove(ref_,
1270 LocationFrom(calling_convention.GetRegisterAt(0)),
1271 type,
1272 nullptr);
1273 parallel_move.AddMove(obj_,
1274 LocationFrom(calling_convention.GetRegisterAt(1)),
1275 type,
1276 nullptr);
1277 if (index.IsValid()) {
1278 parallel_move.AddMove(index,
1279 LocationFrom(calling_convention.GetRegisterAt(2)),
1280 Primitive::kPrimInt,
1281 nullptr);
1282 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
1283 } else {
1284 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
1285 arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
1286 }
1287 arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
1288 instruction_,
1289 instruction_->GetDexPc(),
1290 this);
1291 CheckEntrypointTypes<
1292 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
1293 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1294
1295 RestoreLiveRegisters(codegen, locations);
1296
1297 __ B(GetExitLabel());
1298 }
1299
GetDescription() const1300 const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
1301
1302 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)1303 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
1304 size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
1305 size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
1306 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
1307 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
1308 return Register(VIXLRegCodeFromART(i), kXRegSize);
1309 }
1310 }
1311 // We shall never fail to find a free caller-save register, as
1312 // there are more than two core caller-save registers on ARM64
1313 // (meaning it is possible to find one which is different from
1314 // `ref` and `obj`).
1315 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
1316 LOG(FATAL) << "Could not find a free register";
1317 UNREACHABLE();
1318 }
1319
1320 const Location out_;
1321 const Location ref_;
1322 const Location obj_;
1323 const uint32_t offset_;
1324 // An additional location containing an index to an array.
1325 // Only used for HArrayGet and the UnsafeGetObject &
1326 // UnsafeGetObjectVolatile intrinsics.
1327 const Location index_;
1328
1329 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
1330 };
1331
1332 // Slow path generating a read barrier for a GC root.
1333 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
1334 public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)1335 ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
1336 : SlowPathCodeARM64(instruction), out_(out), root_(root) {
1337 DCHECK(kEmitCompilerReadBarrier);
1338 }
1339
EmitNativeCode(CodeGenerator * codegen)1340 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1341 LocationSummary* locations = instruction_->GetLocations();
1342 Primitive::Type type = Primitive::kPrimNot;
1343 DCHECK(locations->CanCall());
1344 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1345 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
1346 << "Unexpected instruction in read barrier for GC root slow path: "
1347 << instruction_->DebugName();
1348
1349 __ Bind(GetEntryLabel());
1350 SaveLiveRegisters(codegen, locations);
1351
1352 InvokeRuntimeCallingConvention calling_convention;
1353 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1354 // The argument of the ReadBarrierForRootSlow is not a managed
1355 // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
1356 // thus we need a 64-bit move here, and we cannot use
1357 //
1358 // arm64_codegen->MoveLocation(
1359 // LocationFrom(calling_convention.GetRegisterAt(0)),
1360 // root_,
1361 // type);
1362 //
1363 // which would emit a 32-bit move, as `type` is a (32-bit wide)
1364 // reference type (`Primitive::kPrimNot`).
1365 __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
1366 arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
1367 instruction_,
1368 instruction_->GetDexPc(),
1369 this);
1370 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
1371 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1372
1373 RestoreLiveRegisters(codegen, locations);
1374 __ B(GetExitLabel());
1375 }
1376
GetDescription() const1377 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
1378
1379 private:
1380 const Location out_;
1381 const Location root_;
1382
1383 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
1384 };
1385
1386 #undef __
1387
GetNextLocation(Primitive::Type type)1388 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
1389 Location next_location;
1390 if (type == Primitive::kPrimVoid) {
1391 LOG(FATAL) << "Unreachable type " << type;
1392 }
1393
1394 if (Primitive::IsFloatingPointType(type) &&
1395 (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
1396 next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
1397 } else if (!Primitive::IsFloatingPointType(type) &&
1398 (gp_index_ < calling_convention.GetNumberOfRegisters())) {
1399 next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
1400 } else {
1401 size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
1402 next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
1403 : Location::StackSlot(stack_offset);
1404 }
1405
1406 // Space on the stack is reserved for all arguments.
1407 stack_index_ += Primitive::Is64BitType(type) ? 2 : 1;
1408 return next_location;
1409 }
1410
GetMethodLocation() const1411 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
1412 return LocationFrom(kArtMethodRegister);
1413 }
1414
CodeGeneratorARM64(HGraph * graph,const Arm64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1415 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
1416 const Arm64InstructionSetFeatures& isa_features,
1417 const CompilerOptions& compiler_options,
1418 OptimizingCompilerStats* stats)
1419 : CodeGenerator(graph,
1420 kNumberOfAllocatableRegisters,
1421 kNumberOfAllocatableFPRegisters,
1422 kNumberOfAllocatableRegisterPairs,
1423 callee_saved_core_registers.GetList(),
1424 callee_saved_fp_registers.GetList(),
1425 compiler_options,
1426 stats),
1427 block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1428 jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1429 location_builder_(graph, this),
1430 instruction_visitor_(graph, this),
1431 move_resolver_(graph->GetArena(), this),
1432 assembler_(graph->GetArena()),
1433 isa_features_(isa_features),
1434 uint32_literals_(std::less<uint32_t>(),
1435 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1436 uint64_literals_(std::less<uint64_t>(),
1437 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1438 pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1439 boot_image_string_patches_(StringReferenceValueComparator(),
1440 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1441 pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1442 boot_image_type_patches_(TypeReferenceValueComparator(),
1443 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1444 pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1445 type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1446 baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1447 jit_string_patches_(StringReferenceValueComparator(),
1448 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1449 jit_class_patches_(TypeReferenceValueComparator(),
1450 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1451 // Save the link register (containing the return address) to mimic Quick.
1452 AddAllocatedRegister(LocationFrom(lr));
1453 }
1454
1455 #define __ GetVIXLAssembler()->
1456
EmitJumpTables()1457 void CodeGeneratorARM64::EmitJumpTables() {
1458 for (auto&& jump_table : jump_tables_) {
1459 jump_table->EmitTable(this);
1460 }
1461 }
1462
Finalize(CodeAllocator * allocator)1463 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
1464 EmitJumpTables();
1465 // Ensure we emit the literal pool.
1466 __ FinalizeCode();
1467
1468 CodeGenerator::Finalize(allocator);
1469 }
1470
PrepareForEmitNativeCode()1471 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1472 // Note: There are 6 kinds of moves:
1473 // 1. constant -> GPR/FPR (non-cycle)
1474 // 2. constant -> stack (non-cycle)
1475 // 3. GPR/FPR -> GPR/FPR
1476 // 4. GPR/FPR -> stack
1477 // 5. stack -> GPR/FPR
1478 // 6. stack -> stack (non-cycle)
1479 // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1480 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1481 // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1482 // dependency.
1483 vixl_temps_.Open(GetVIXLAssembler());
1484 }
1485
FinishEmitNativeCode()1486 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1487 vixl_temps_.Close();
1488 }
1489
AllocateScratchLocationFor(Location::Kind kind)1490 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1491 DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1492 || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1493 || kind == Location::kSIMDStackSlot);
1494 kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1495 ? Location::kFpuRegister
1496 : Location::kRegister;
1497 Location scratch = GetScratchLocation(kind);
1498 if (!scratch.Equals(Location::NoLocation())) {
1499 return scratch;
1500 }
1501 // Allocate from VIXL temp registers.
1502 if (kind == Location::kRegister) {
1503 scratch = LocationFrom(vixl_temps_.AcquireX());
1504 } else {
1505 DCHECK(kind == Location::kFpuRegister);
1506 scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
1507 ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
1508 : vixl_temps_.AcquireD());
1509 }
1510 AddScratchLocation(scratch);
1511 return scratch;
1512 }
1513
FreeScratchLocation(Location loc)1514 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1515 if (loc.IsRegister()) {
1516 vixl_temps_.Release(XRegisterFrom(loc));
1517 } else {
1518 DCHECK(loc.IsFpuRegister());
1519 vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
1520 }
1521 RemoveScratchLocation(loc);
1522 }
1523
EmitMove(size_t index)1524 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1525 MoveOperands* move = moves_[index];
1526 codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid);
1527 }
1528
GenerateFrameEntry()1529 void CodeGeneratorARM64::GenerateFrameEntry() {
1530 MacroAssembler* masm = GetVIXLAssembler();
1531 __ Bind(&frame_entry_label_);
1532
1533 bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
1534 if (do_overflow_check) {
1535 UseScratchRegisterScope temps(masm);
1536 Register temp = temps.AcquireX();
1537 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1538 __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
1539 {
1540 // Ensure that between load and RecordPcInfo there are no pools emitted.
1541 ExactAssemblyScope eas(GetVIXLAssembler(),
1542 kInstructionSize,
1543 CodeBufferCheckScope::kExactSize);
1544 __ ldr(wzr, MemOperand(temp, 0));
1545 RecordPcInfo(nullptr, 0);
1546 }
1547 }
1548
1549 if (!HasEmptyFrame()) {
1550 int frame_size = GetFrameSize();
1551 // Stack layout:
1552 // sp[frame_size - 8] : lr.
1553 // ... : other preserved core registers.
1554 // ... : other preserved fp registers.
1555 // ... : reserved frame space.
1556 // sp[0] : current method.
1557
1558 // Save the current method if we need it. Note that we do not
1559 // do this in HCurrentMethod, as the instruction might have been removed
1560 // in the SSA graph.
1561 if (RequiresCurrentMethod()) {
1562 __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1563 } else {
1564 __ Claim(frame_size);
1565 }
1566 GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1567 GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
1568 frame_size - GetCoreSpillSize());
1569 GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
1570 frame_size - FrameEntrySpillSize());
1571
1572 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1573 // Initialize should_deoptimize flag to 0.
1574 Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1575 __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1576 }
1577 }
1578 }
1579
GenerateFrameExit()1580 void CodeGeneratorARM64::GenerateFrameExit() {
1581 GetAssembler()->cfi().RememberState();
1582 if (!HasEmptyFrame()) {
1583 int frame_size = GetFrameSize();
1584 GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(),
1585 frame_size - FrameEntrySpillSize());
1586 GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(),
1587 frame_size - GetCoreSpillSize());
1588 __ Drop(frame_size);
1589 GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1590 }
1591 __ Ret();
1592 GetAssembler()->cfi().RestoreState();
1593 GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1594 }
1595
GetFramePreservedCoreRegisters() const1596 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1597 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1598 return CPURegList(CPURegister::kRegister, kXRegSize,
1599 core_spill_mask_);
1600 }
1601
GetFramePreservedFPRegisters() const1602 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1603 DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1604 GetNumberOfFloatingPointRegisters()));
1605 return CPURegList(CPURegister::kFPRegister, kDRegSize,
1606 fpu_spill_mask_);
1607 }
1608
Bind(HBasicBlock * block)1609 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1610 __ Bind(GetLabelOf(block));
1611 }
1612
MoveConstant(Location location,int32_t value)1613 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1614 DCHECK(location.IsRegister());
1615 __ Mov(RegisterFrom(location, Primitive::kPrimInt), value);
1616 }
1617
AddLocationAsTemp(Location location,LocationSummary * locations)1618 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1619 if (location.IsRegister()) {
1620 locations->AddTemp(location);
1621 } else {
1622 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1623 }
1624 }
1625
MarkGCCard(Register object,Register value,bool value_can_be_null)1626 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
1627 UseScratchRegisterScope temps(GetVIXLAssembler());
1628 Register card = temps.AcquireX();
1629 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit.
1630 vixl::aarch64::Label done;
1631 if (value_can_be_null) {
1632 __ Cbz(value, &done);
1633 }
1634 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1635 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1636 __ Strb(card, MemOperand(card, temp.X()));
1637 if (value_can_be_null) {
1638 __ Bind(&done);
1639 }
1640 }
1641
SetupBlockedRegisters() const1642 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1643 // Blocked core registers:
1644 // lr : Runtime reserved.
1645 // tr : Runtime reserved.
1646 // xSuspend : Runtime reserved. TODO: Unblock this when the runtime stops using it.
1647 // ip1 : VIXL core temp.
1648 // ip0 : VIXL core temp.
1649 //
1650 // Blocked fp registers:
1651 // d31 : VIXL fp temp.
1652 CPURegList reserved_core_registers = vixl_reserved_core_registers;
1653 reserved_core_registers.Combine(runtime_reserved_core_registers);
1654 while (!reserved_core_registers.IsEmpty()) {
1655 blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1656 }
1657
1658 CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1659 while (!reserved_fp_registers.IsEmpty()) {
1660 blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1661 }
1662
1663 if (GetGraph()->IsDebuggable()) {
1664 // Stubs do not save callee-save floating point registers. If the graph
1665 // is debuggable, we need to deal with these registers differently. For
1666 // now, just block them.
1667 CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1668 while (!reserved_fp_registers_debuggable.IsEmpty()) {
1669 blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1670 }
1671 }
1672 }
1673
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1674 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1675 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1676 __ Str(reg, MemOperand(sp, stack_index));
1677 return kArm64WordSize;
1678 }
1679
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1680 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1681 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1682 __ Ldr(reg, MemOperand(sp, stack_index));
1683 return kArm64WordSize;
1684 }
1685
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1686 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1687 FPRegister reg = FPRegister(reg_id, kDRegSize);
1688 __ Str(reg, MemOperand(sp, stack_index));
1689 return kArm64WordSize;
1690 }
1691
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1692 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1693 FPRegister reg = FPRegister(reg_id, kDRegSize);
1694 __ Ldr(reg, MemOperand(sp, stack_index));
1695 return kArm64WordSize;
1696 }
1697
DumpCoreRegister(std::ostream & stream,int reg) const1698 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1699 stream << XRegister(reg);
1700 }
1701
DumpFloatingPointRegister(std::ostream & stream,int reg) const1702 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1703 stream << DRegister(reg);
1704 }
1705
MoveConstant(CPURegister destination,HConstant * constant)1706 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1707 if (constant->IsIntConstant()) {
1708 __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1709 } else if (constant->IsLongConstant()) {
1710 __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1711 } else if (constant->IsNullConstant()) {
1712 __ Mov(Register(destination), 0);
1713 } else if (constant->IsFloatConstant()) {
1714 __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
1715 } else {
1716 DCHECK(constant->IsDoubleConstant());
1717 __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue());
1718 }
1719 }
1720
1721
CoherentConstantAndType(Location constant,Primitive::Type type)1722 static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
1723 DCHECK(constant.IsConstant());
1724 HConstant* cst = constant.GetConstant();
1725 return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
1726 // Null is mapped to a core W register, which we associate with kPrimInt.
1727 (cst->IsNullConstant() && type == Primitive::kPrimInt) ||
1728 (cst->IsLongConstant() && type == Primitive::kPrimLong) ||
1729 (cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
1730 (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
1731 }
1732
1733 // Allocate a scratch register from the VIXL pool, querying first into
1734 // the floating-point register pool, and then the the core register
1735 // pool. This is essentially a reimplementation of
1736 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1737 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1738 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1739 vixl::aarch64::UseScratchRegisterScope* temps,
1740 int size_in_bits) {
1741 return masm->GetScratchFPRegisterList()->IsEmpty()
1742 ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1743 : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1744 }
1745
MoveLocation(Location destination,Location source,Primitive::Type dst_type)1746 void CodeGeneratorARM64::MoveLocation(Location destination,
1747 Location source,
1748 Primitive::Type dst_type) {
1749 if (source.Equals(destination)) {
1750 return;
1751 }
1752
1753 // A valid move can always be inferred from the destination and source
1754 // locations. When moving from and to a register, the argument type can be
1755 // used to generate 32bit instead of 64bit moves. In debug mode we also
1756 // checks the coherency of the locations and the type.
1757 bool unspecified_type = (dst_type == Primitive::kPrimVoid);
1758
1759 if (destination.IsRegister() || destination.IsFpuRegister()) {
1760 if (unspecified_type) {
1761 HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1762 if (source.IsStackSlot() ||
1763 (src_cst != nullptr && (src_cst->IsIntConstant()
1764 || src_cst->IsFloatConstant()
1765 || src_cst->IsNullConstant()))) {
1766 // For stack slots and 32bit constants, a 64bit type is appropriate.
1767 dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
1768 } else {
1769 // If the source is a double stack slot or a 64bit constant, a 64bit
1770 // type is appropriate. Else the source is a register, and since the
1771 // type has not been specified, we chose a 64bit type to force a 64bit
1772 // move.
1773 dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
1774 }
1775 }
1776 DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) ||
1777 (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type)));
1778 CPURegister dst = CPURegisterFrom(destination, dst_type);
1779 if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1780 DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1781 __ Ldr(dst, StackOperandFrom(source));
1782 } else if (source.IsSIMDStackSlot()) {
1783 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1784 } else if (source.IsConstant()) {
1785 DCHECK(CoherentConstantAndType(source, dst_type));
1786 MoveConstant(dst, source.GetConstant());
1787 } else if (source.IsRegister()) {
1788 if (destination.IsRegister()) {
1789 __ Mov(Register(dst), RegisterFrom(source, dst_type));
1790 } else {
1791 DCHECK(destination.IsFpuRegister());
1792 Primitive::Type source_type = Primitive::Is64BitType(dst_type)
1793 ? Primitive::kPrimLong
1794 : Primitive::kPrimInt;
1795 __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1796 }
1797 } else {
1798 DCHECK(source.IsFpuRegister());
1799 if (destination.IsRegister()) {
1800 Primitive::Type source_type = Primitive::Is64BitType(dst_type)
1801 ? Primitive::kPrimDouble
1802 : Primitive::kPrimFloat;
1803 __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1804 } else {
1805 DCHECK(destination.IsFpuRegister());
1806 if (GetGraph()->HasSIMD()) {
1807 __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1808 } else {
1809 __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
1810 }
1811 }
1812 }
1813 } else if (destination.IsSIMDStackSlot()) {
1814 if (source.IsFpuRegister()) {
1815 __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1816 } else {
1817 DCHECK(source.IsSIMDStackSlot());
1818 UseScratchRegisterScope temps(GetVIXLAssembler());
1819 if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) {
1820 Register temp = temps.AcquireX();
1821 __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1822 __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1823 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1824 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1825 } else {
1826 FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1827 __ Ldr(temp, StackOperandFrom(source));
1828 __ Str(temp, StackOperandFrom(destination));
1829 }
1830 }
1831 } else { // The destination is not a register. It must be a stack slot.
1832 DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1833 if (source.IsRegister() || source.IsFpuRegister()) {
1834 if (unspecified_type) {
1835 if (source.IsRegister()) {
1836 dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
1837 } else {
1838 dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
1839 }
1840 }
1841 DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) &&
1842 (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type)));
1843 __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1844 } else if (source.IsConstant()) {
1845 DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1846 << source << " " << dst_type;
1847 UseScratchRegisterScope temps(GetVIXLAssembler());
1848 HConstant* src_cst = source.GetConstant();
1849 CPURegister temp;
1850 if (src_cst->IsZeroBitPattern()) {
1851 temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1852 ? Register(xzr)
1853 : Register(wzr);
1854 } else {
1855 if (src_cst->IsIntConstant()) {
1856 temp = temps.AcquireW();
1857 } else if (src_cst->IsLongConstant()) {
1858 temp = temps.AcquireX();
1859 } else if (src_cst->IsFloatConstant()) {
1860 temp = temps.AcquireS();
1861 } else {
1862 DCHECK(src_cst->IsDoubleConstant());
1863 temp = temps.AcquireD();
1864 }
1865 MoveConstant(temp, src_cst);
1866 }
1867 __ Str(temp, StackOperandFrom(destination));
1868 } else {
1869 DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1870 DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1871 UseScratchRegisterScope temps(GetVIXLAssembler());
1872 // Use any scratch register (a core or a floating-point one)
1873 // from VIXL scratch register pools as a temporary.
1874 //
1875 // We used to only use the FP scratch register pool, but in some
1876 // rare cases the only register from this pool (D31) would
1877 // already be used (e.g. within a ParallelMove instruction, when
1878 // a move is blocked by a another move requiring a scratch FP
1879 // register, which would reserve D31). To prevent this issue, we
1880 // ask for a scratch register of any type (core or FP).
1881 //
1882 // Also, we start by asking for a FP scratch register first, as the
1883 // demand of scratch core registers is higher. This is why we
1884 // use AcquireFPOrCoreCPURegisterOfSize instead of
1885 // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1886 // allocates core scratch registers first.
1887 CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1888 GetVIXLAssembler(),
1889 &temps,
1890 (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1891 __ Ldr(temp, StackOperandFrom(source));
1892 __ Str(temp, StackOperandFrom(destination));
1893 }
1894 }
1895 }
1896
Load(Primitive::Type type,CPURegister dst,const MemOperand & src)1897 void CodeGeneratorARM64::Load(Primitive::Type type,
1898 CPURegister dst,
1899 const MemOperand& src) {
1900 switch (type) {
1901 case Primitive::kPrimBoolean:
1902 __ Ldrb(Register(dst), src);
1903 break;
1904 case Primitive::kPrimByte:
1905 __ Ldrsb(Register(dst), src);
1906 break;
1907 case Primitive::kPrimShort:
1908 __ Ldrsh(Register(dst), src);
1909 break;
1910 case Primitive::kPrimChar:
1911 __ Ldrh(Register(dst), src);
1912 break;
1913 case Primitive::kPrimInt:
1914 case Primitive::kPrimNot:
1915 case Primitive::kPrimLong:
1916 case Primitive::kPrimFloat:
1917 case Primitive::kPrimDouble:
1918 DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
1919 __ Ldr(dst, src);
1920 break;
1921 case Primitive::kPrimVoid:
1922 LOG(FATAL) << "Unreachable type " << type;
1923 }
1924 }
1925
LoadAcquire(HInstruction * instruction,CPURegister dst,const MemOperand & src,bool needs_null_check)1926 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1927 CPURegister dst,
1928 const MemOperand& src,
1929 bool needs_null_check) {
1930 MacroAssembler* masm = GetVIXLAssembler();
1931 UseScratchRegisterScope temps(masm);
1932 Register temp_base = temps.AcquireX();
1933 Primitive::Type type = instruction->GetType();
1934
1935 DCHECK(!src.IsPreIndex());
1936 DCHECK(!src.IsPostIndex());
1937
1938 // TODO(vixl): Let the MacroAssembler handle MemOperand.
1939 __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1940 {
1941 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1942 MemOperand base = MemOperand(temp_base);
1943 switch (type) {
1944 case Primitive::kPrimBoolean:
1945 {
1946 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1947 __ ldarb(Register(dst), base);
1948 if (needs_null_check) {
1949 MaybeRecordImplicitNullCheck(instruction);
1950 }
1951 }
1952 break;
1953 case Primitive::kPrimByte:
1954 {
1955 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1956 __ ldarb(Register(dst), base);
1957 if (needs_null_check) {
1958 MaybeRecordImplicitNullCheck(instruction);
1959 }
1960 }
1961 __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
1962 break;
1963 case Primitive::kPrimChar:
1964 {
1965 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1966 __ ldarh(Register(dst), base);
1967 if (needs_null_check) {
1968 MaybeRecordImplicitNullCheck(instruction);
1969 }
1970 }
1971 break;
1972 case Primitive::kPrimShort:
1973 {
1974 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1975 __ ldarh(Register(dst), base);
1976 if (needs_null_check) {
1977 MaybeRecordImplicitNullCheck(instruction);
1978 }
1979 }
1980 __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
1981 break;
1982 case Primitive::kPrimInt:
1983 case Primitive::kPrimNot:
1984 case Primitive::kPrimLong:
1985 DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
1986 {
1987 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1988 __ ldar(Register(dst), base);
1989 if (needs_null_check) {
1990 MaybeRecordImplicitNullCheck(instruction);
1991 }
1992 }
1993 break;
1994 case Primitive::kPrimFloat:
1995 case Primitive::kPrimDouble: {
1996 DCHECK(dst.IsFPRegister());
1997 DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
1998
1999 Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2000 {
2001 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2002 __ ldar(temp, base);
2003 if (needs_null_check) {
2004 MaybeRecordImplicitNullCheck(instruction);
2005 }
2006 }
2007 __ Fmov(FPRegister(dst), temp);
2008 break;
2009 }
2010 case Primitive::kPrimVoid:
2011 LOG(FATAL) << "Unreachable type " << type;
2012 }
2013 }
2014 }
2015
Store(Primitive::Type type,CPURegister src,const MemOperand & dst)2016 void CodeGeneratorARM64::Store(Primitive::Type type,
2017 CPURegister src,
2018 const MemOperand& dst) {
2019 switch (type) {
2020 case Primitive::kPrimBoolean:
2021 case Primitive::kPrimByte:
2022 __ Strb(Register(src), dst);
2023 break;
2024 case Primitive::kPrimChar:
2025 case Primitive::kPrimShort:
2026 __ Strh(Register(src), dst);
2027 break;
2028 case Primitive::kPrimInt:
2029 case Primitive::kPrimNot:
2030 case Primitive::kPrimLong:
2031 case Primitive::kPrimFloat:
2032 case Primitive::kPrimDouble:
2033 DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
2034 __ Str(src, dst);
2035 break;
2036 case Primitive::kPrimVoid:
2037 LOG(FATAL) << "Unreachable type " << type;
2038 }
2039 }
2040
StoreRelease(HInstruction * instruction,Primitive::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)2041 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
2042 Primitive::Type type,
2043 CPURegister src,
2044 const MemOperand& dst,
2045 bool needs_null_check) {
2046 MacroAssembler* masm = GetVIXLAssembler();
2047 UseScratchRegisterScope temps(GetVIXLAssembler());
2048 Register temp_base = temps.AcquireX();
2049
2050 DCHECK(!dst.IsPreIndex());
2051 DCHECK(!dst.IsPostIndex());
2052
2053 // TODO(vixl): Let the MacroAssembler handle this.
2054 Operand op = OperandFromMemOperand(dst);
2055 __ Add(temp_base, dst.GetBaseRegister(), op);
2056 MemOperand base = MemOperand(temp_base);
2057 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2058 switch (type) {
2059 case Primitive::kPrimBoolean:
2060 case Primitive::kPrimByte:
2061 {
2062 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2063 __ stlrb(Register(src), base);
2064 if (needs_null_check) {
2065 MaybeRecordImplicitNullCheck(instruction);
2066 }
2067 }
2068 break;
2069 case Primitive::kPrimChar:
2070 case Primitive::kPrimShort:
2071 {
2072 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2073 __ stlrh(Register(src), base);
2074 if (needs_null_check) {
2075 MaybeRecordImplicitNullCheck(instruction);
2076 }
2077 }
2078 break;
2079 case Primitive::kPrimInt:
2080 case Primitive::kPrimNot:
2081 case Primitive::kPrimLong:
2082 DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
2083 {
2084 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2085 __ stlr(Register(src), base);
2086 if (needs_null_check) {
2087 MaybeRecordImplicitNullCheck(instruction);
2088 }
2089 }
2090 break;
2091 case Primitive::kPrimFloat:
2092 case Primitive::kPrimDouble: {
2093 DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
2094 Register temp_src;
2095 if (src.IsZero()) {
2096 // The zero register is used to avoid synthesizing zero constants.
2097 temp_src = Register(src);
2098 } else {
2099 DCHECK(src.IsFPRegister());
2100 temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2101 __ Fmov(temp_src, FPRegister(src));
2102 }
2103 {
2104 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2105 __ stlr(temp_src, base);
2106 if (needs_null_check) {
2107 MaybeRecordImplicitNullCheck(instruction);
2108 }
2109 }
2110 break;
2111 }
2112 case Primitive::kPrimVoid:
2113 LOG(FATAL) << "Unreachable type " << type;
2114 }
2115 }
2116
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2117 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
2118 HInstruction* instruction,
2119 uint32_t dex_pc,
2120 SlowPathCode* slow_path) {
2121 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2122
2123 __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
2124 {
2125 // Ensure the pc position is recorded immediately after the `blr` instruction.
2126 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2127 __ blr(lr);
2128 if (EntrypointRequiresStackMap(entrypoint)) {
2129 RecordPcInfo(instruction, dex_pc, slow_path);
2130 }
2131 }
2132 }
2133
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2134 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2135 HInstruction* instruction,
2136 SlowPathCode* slow_path) {
2137 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2138 __ Ldr(lr, MemOperand(tr, entry_point_offset));
2139 __ Blr(lr);
2140 }
2141
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)2142 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
2143 Register class_reg) {
2144 UseScratchRegisterScope temps(GetVIXLAssembler());
2145 Register temp = temps.AcquireW();
2146 size_t status_offset = mirror::Class::StatusOffset().SizeValue();
2147
2148 // Even if the initialized flag is set, we need to ensure consistent memory ordering.
2149 // TODO(vixl): Let the MacroAssembler handle MemOperand.
2150 __ Add(temp, class_reg, status_offset);
2151 __ Ldar(temp, HeapOperand(temp));
2152 __ Cmp(temp, mirror::Class::kStatusInitialized);
2153 __ B(lt, slow_path->GetEntryLabel());
2154 __ Bind(slow_path->GetExitLabel());
2155 }
2156
GenerateMemoryBarrier(MemBarrierKind kind)2157 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
2158 BarrierType type = BarrierAll;
2159
2160 switch (kind) {
2161 case MemBarrierKind::kAnyAny:
2162 case MemBarrierKind::kAnyStore: {
2163 type = BarrierAll;
2164 break;
2165 }
2166 case MemBarrierKind::kLoadAny: {
2167 type = BarrierReads;
2168 break;
2169 }
2170 case MemBarrierKind::kStoreStore: {
2171 type = BarrierWrites;
2172 break;
2173 }
2174 default:
2175 LOG(FATAL) << "Unexpected memory barrier " << kind;
2176 }
2177 __ Dmb(InnerShareable, type);
2178 }
2179
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)2180 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
2181 HBasicBlock* successor) {
2182 SuspendCheckSlowPathARM64* slow_path =
2183 down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
2184 if (slow_path == nullptr) {
2185 slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
2186 instruction->SetSlowPath(slow_path);
2187 codegen_->AddSlowPath(slow_path);
2188 if (successor != nullptr) {
2189 DCHECK(successor->IsLoopHeader());
2190 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
2191 }
2192 } else {
2193 DCHECK_EQ(slow_path->GetSuccessor(), successor);
2194 }
2195
2196 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
2197 Register temp = temps.AcquireW();
2198
2199 __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
2200 if (successor == nullptr) {
2201 __ Cbnz(temp, slow_path->GetEntryLabel());
2202 __ Bind(slow_path->GetReturnLabel());
2203 } else {
2204 __ Cbz(temp, codegen_->GetLabelOf(successor));
2205 __ B(slow_path->GetEntryLabel());
2206 // slow_path will return to GetLabelOf(successor).
2207 }
2208 }
2209
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)2210 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
2211 CodeGeneratorARM64* codegen)
2212 : InstructionCodeGenerator(graph, codegen),
2213 assembler_(codegen->GetAssembler()),
2214 codegen_(codegen) {}
2215
2216 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \
2217 /* No unimplemented IR. */
2218
2219 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
2220
2221 enum UnimplementedInstructionBreakCode {
2222 // Using a base helps identify when we hit such breakpoints.
2223 UnimplementedInstructionBreakCodeBaseCode = 0x900,
2224 #define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name),
2225 FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION)
2226 #undef ENUM_UNIMPLEMENTED_INSTRUCTION
2227 };
2228
2229 #define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name) \
2230 void InstructionCodeGeneratorARM64::Visit##name(H##name* instr ATTRIBUTE_UNUSED) { \
2231 __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name)); \
2232 } \
2233 void LocationsBuilderARM64::Visit##name(H##name* instr) { \
2234 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); \
2235 locations->SetOut(Location::Any()); \
2236 }
FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS)2237 FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS)
2238 #undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS
2239
2240 #undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE
2241 #undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION
2242
2243 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
2244 DCHECK_EQ(instr->InputCount(), 2U);
2245 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
2246 Primitive::Type type = instr->GetResultType();
2247 switch (type) {
2248 case Primitive::kPrimInt:
2249 case Primitive::kPrimLong:
2250 locations->SetInAt(0, Location::RequiresRegister());
2251 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
2252 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2253 break;
2254
2255 case Primitive::kPrimFloat:
2256 case Primitive::kPrimDouble:
2257 locations->SetInAt(0, Location::RequiresFpuRegister());
2258 locations->SetInAt(1, Location::RequiresFpuRegister());
2259 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2260 break;
2261
2262 default:
2263 LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
2264 }
2265 }
2266
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2267 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
2268 const FieldInfo& field_info) {
2269 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2270
2271 bool object_field_get_with_read_barrier =
2272 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
2273 LocationSummary* locations =
2274 new (GetGraph()->GetArena()) LocationSummary(instruction,
2275 object_field_get_with_read_barrier ?
2276 LocationSummary::kCallOnSlowPath :
2277 LocationSummary::kNoCall);
2278 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
2279 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2280 // We need a temporary register for the read barrier marking slow
2281 // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
2282 if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2283 !Runtime::Current()->UseJitCompilation() &&
2284 !field_info.IsVolatile()) {
2285 // If link-time thunks for the Baker read barrier are enabled, for AOT
2286 // non-volatile loads we need a temporary only if the offset is too big.
2287 if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
2288 locations->AddTemp(FixedTempLocation());
2289 }
2290 } else {
2291 locations->AddTemp(Location::RequiresRegister());
2292 }
2293 }
2294 locations->SetInAt(0, Location::RequiresRegister());
2295 if (Primitive::IsFloatingPointType(instruction->GetType())) {
2296 locations->SetOut(Location::RequiresFpuRegister());
2297 } else {
2298 // The output overlaps for an object field get when read barriers
2299 // are enabled: we do not want the load to overwrite the object's
2300 // location, as we need it to emit the read barrier.
2301 locations->SetOut(
2302 Location::RequiresRegister(),
2303 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2304 }
2305 }
2306
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2307 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2308 const FieldInfo& field_info) {
2309 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2310 LocationSummary* locations = instruction->GetLocations();
2311 Location base_loc = locations->InAt(0);
2312 Location out = locations->Out();
2313 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2314 Primitive::Type field_type = field_info.GetFieldType();
2315 MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
2316
2317 if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2318 // Object FieldGet with Baker's read barrier case.
2319 // /* HeapReference<Object> */ out = *(base + offset)
2320 Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
2321 Location maybe_temp =
2322 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2323 // Note that potential implicit null checks are handled in this
2324 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2325 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2326 instruction,
2327 out,
2328 base,
2329 offset,
2330 maybe_temp,
2331 /* needs_null_check */ true,
2332 field_info.IsVolatile());
2333 } else {
2334 // General case.
2335 if (field_info.IsVolatile()) {
2336 // Note that a potential implicit null check is handled in this
2337 // CodeGeneratorARM64::LoadAcquire call.
2338 // NB: LoadAcquire will record the pc info if needed.
2339 codegen_->LoadAcquire(
2340 instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
2341 } else {
2342 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2343 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2344 codegen_->Load(field_type, OutputCPURegister(instruction), field);
2345 codegen_->MaybeRecordImplicitNullCheck(instruction);
2346 }
2347 if (field_type == Primitive::kPrimNot) {
2348 // If read barriers are enabled, emit read barriers other than
2349 // Baker's using a slow path (and also unpoison the loaded
2350 // reference, if heap poisoning is enabled).
2351 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2352 }
2353 }
2354 }
2355
HandleFieldSet(HInstruction * instruction)2356 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2357 LocationSummary* locations =
2358 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
2359 locations->SetInAt(0, Location::RequiresRegister());
2360 if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
2361 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
2362 } else if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
2363 locations->SetInAt(1, Location::RequiresFpuRegister());
2364 } else {
2365 locations->SetInAt(1, Location::RequiresRegister());
2366 }
2367 }
2368
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)2369 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2370 const FieldInfo& field_info,
2371 bool value_can_be_null) {
2372 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2373
2374 Register obj = InputRegisterAt(instruction, 0);
2375 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2376 CPURegister source = value;
2377 Offset offset = field_info.GetFieldOffset();
2378 Primitive::Type field_type = field_info.GetFieldType();
2379
2380 {
2381 // We use a block to end the scratch scope before the write barrier, thus
2382 // freeing the temporary registers so they can be used in `MarkGCCard`.
2383 UseScratchRegisterScope temps(GetVIXLAssembler());
2384
2385 if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
2386 DCHECK(value.IsW());
2387 Register temp = temps.AcquireW();
2388 __ Mov(temp, value.W());
2389 GetAssembler()->PoisonHeapReference(temp.W());
2390 source = temp;
2391 }
2392
2393 if (field_info.IsVolatile()) {
2394 codegen_->StoreRelease(
2395 instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true);
2396 } else {
2397 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2398 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2399 codegen_->Store(field_type, source, HeapOperand(obj, offset));
2400 codegen_->MaybeRecordImplicitNullCheck(instruction);
2401 }
2402 }
2403
2404 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
2405 codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
2406 }
2407 }
2408
HandleBinaryOp(HBinaryOperation * instr)2409 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2410 Primitive::Type type = instr->GetType();
2411
2412 switch (type) {
2413 case Primitive::kPrimInt:
2414 case Primitive::kPrimLong: {
2415 Register dst = OutputRegister(instr);
2416 Register lhs = InputRegisterAt(instr, 0);
2417 Operand rhs = InputOperandAt(instr, 1);
2418 if (instr->IsAdd()) {
2419 __ Add(dst, lhs, rhs);
2420 } else if (instr->IsAnd()) {
2421 __ And(dst, lhs, rhs);
2422 } else if (instr->IsOr()) {
2423 __ Orr(dst, lhs, rhs);
2424 } else if (instr->IsSub()) {
2425 __ Sub(dst, lhs, rhs);
2426 } else if (instr->IsRor()) {
2427 if (rhs.IsImmediate()) {
2428 uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2429 __ Ror(dst, lhs, shift);
2430 } else {
2431 // Ensure shift distance is in the same size register as the result. If
2432 // we are rotating a long and the shift comes in a w register originally,
2433 // we don't need to sxtw for use as an x since the shift distances are
2434 // all & reg_bits - 1.
2435 __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2436 }
2437 } else {
2438 DCHECK(instr->IsXor());
2439 __ Eor(dst, lhs, rhs);
2440 }
2441 break;
2442 }
2443 case Primitive::kPrimFloat:
2444 case Primitive::kPrimDouble: {
2445 FPRegister dst = OutputFPRegister(instr);
2446 FPRegister lhs = InputFPRegisterAt(instr, 0);
2447 FPRegister rhs = InputFPRegisterAt(instr, 1);
2448 if (instr->IsAdd()) {
2449 __ Fadd(dst, lhs, rhs);
2450 } else if (instr->IsSub()) {
2451 __ Fsub(dst, lhs, rhs);
2452 } else {
2453 LOG(FATAL) << "Unexpected floating-point binary operation";
2454 }
2455 break;
2456 }
2457 default:
2458 LOG(FATAL) << "Unexpected binary operation type " << type;
2459 }
2460 }
2461
HandleShift(HBinaryOperation * instr)2462 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2463 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2464
2465 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
2466 Primitive::Type type = instr->GetResultType();
2467 switch (type) {
2468 case Primitive::kPrimInt:
2469 case Primitive::kPrimLong: {
2470 locations->SetInAt(0, Location::RequiresRegister());
2471 locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2472 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2473 break;
2474 }
2475 default:
2476 LOG(FATAL) << "Unexpected shift type " << type;
2477 }
2478 }
2479
HandleShift(HBinaryOperation * instr)2480 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2481 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2482
2483 Primitive::Type type = instr->GetType();
2484 switch (type) {
2485 case Primitive::kPrimInt:
2486 case Primitive::kPrimLong: {
2487 Register dst = OutputRegister(instr);
2488 Register lhs = InputRegisterAt(instr, 0);
2489 Operand rhs = InputOperandAt(instr, 1);
2490 if (rhs.IsImmediate()) {
2491 uint32_t shift_value = rhs.GetImmediate() &
2492 (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2493 if (instr->IsShl()) {
2494 __ Lsl(dst, lhs, shift_value);
2495 } else if (instr->IsShr()) {
2496 __ Asr(dst, lhs, shift_value);
2497 } else {
2498 __ Lsr(dst, lhs, shift_value);
2499 }
2500 } else {
2501 Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2502
2503 if (instr->IsShl()) {
2504 __ Lsl(dst, lhs, rhs_reg);
2505 } else if (instr->IsShr()) {
2506 __ Asr(dst, lhs, rhs_reg);
2507 } else {
2508 __ Lsr(dst, lhs, rhs_reg);
2509 }
2510 }
2511 break;
2512 }
2513 default:
2514 LOG(FATAL) << "Unexpected shift operation type " << type;
2515 }
2516 }
2517
VisitAdd(HAdd * instruction)2518 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2519 HandleBinaryOp(instruction);
2520 }
2521
VisitAdd(HAdd * instruction)2522 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2523 HandleBinaryOp(instruction);
2524 }
2525
VisitAnd(HAnd * instruction)2526 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2527 HandleBinaryOp(instruction);
2528 }
2529
VisitAnd(HAnd * instruction)2530 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2531 HandleBinaryOp(instruction);
2532 }
2533
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2534 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2535 DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
2536 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
2537 locations->SetInAt(0, Location::RequiresRegister());
2538 // There is no immediate variant of negated bitwise instructions in AArch64.
2539 locations->SetInAt(1, Location::RequiresRegister());
2540 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2541 }
2542
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2543 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2544 Register dst = OutputRegister(instr);
2545 Register lhs = InputRegisterAt(instr, 0);
2546 Register rhs = InputRegisterAt(instr, 1);
2547
2548 switch (instr->GetOpKind()) {
2549 case HInstruction::kAnd:
2550 __ Bic(dst, lhs, rhs);
2551 break;
2552 case HInstruction::kOr:
2553 __ Orn(dst, lhs, rhs);
2554 break;
2555 case HInstruction::kXor:
2556 __ Eon(dst, lhs, rhs);
2557 break;
2558 default:
2559 LOG(FATAL) << "Unreachable";
2560 }
2561 }
2562
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2563 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2564 HDataProcWithShifterOp* instruction) {
2565 DCHECK(instruction->GetType() == Primitive::kPrimInt ||
2566 instruction->GetType() == Primitive::kPrimLong);
2567 LocationSummary* locations =
2568 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
2569 if (instruction->GetInstrKind() == HInstruction::kNeg) {
2570 locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
2571 } else {
2572 locations->SetInAt(0, Location::RequiresRegister());
2573 }
2574 locations->SetInAt(1, Location::RequiresRegister());
2575 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2576 }
2577
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2578 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2579 HDataProcWithShifterOp* instruction) {
2580 Primitive::Type type = instruction->GetType();
2581 HInstruction::InstructionKind kind = instruction->GetInstrKind();
2582 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
2583 Register out = OutputRegister(instruction);
2584 Register left;
2585 if (kind != HInstruction::kNeg) {
2586 left = InputRegisterAt(instruction, 0);
2587 }
2588 // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2589 // shifter operand operation, the IR generating `right_reg` (input to the type
2590 // conversion) can have a different type from the current instruction's type,
2591 // so we manually indicate the type.
2592 Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2593 Operand right_operand(0);
2594
2595 HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2596 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2597 right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2598 } else {
2599 right_operand = Operand(right_reg,
2600 helpers::ShiftFromOpKind(op_kind),
2601 instruction->GetShiftAmount());
2602 }
2603
2604 // Logical binary operations do not support extension operations in the
2605 // operand. Note that VIXL would still manage if it was passed by generating
2606 // the extension as a separate instruction.
2607 // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2608 DCHECK(!right_operand.IsExtendedRegister() ||
2609 (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
2610 kind != HInstruction::kNeg));
2611 switch (kind) {
2612 case HInstruction::kAdd:
2613 __ Add(out, left, right_operand);
2614 break;
2615 case HInstruction::kAnd:
2616 __ And(out, left, right_operand);
2617 break;
2618 case HInstruction::kNeg:
2619 DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2620 __ Neg(out, right_operand);
2621 break;
2622 case HInstruction::kOr:
2623 __ Orr(out, left, right_operand);
2624 break;
2625 case HInstruction::kSub:
2626 __ Sub(out, left, right_operand);
2627 break;
2628 case HInstruction::kXor:
2629 __ Eor(out, left, right_operand);
2630 break;
2631 default:
2632 LOG(FATAL) << "Unexpected operation kind: " << kind;
2633 UNREACHABLE();
2634 }
2635 }
2636
VisitIntermediateAddress(HIntermediateAddress * instruction)2637 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2638 LocationSummary* locations =
2639 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
2640 locations->SetInAt(0, Location::RequiresRegister());
2641 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2642 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2643 }
2644
VisitIntermediateAddress(HIntermediateAddress * instruction)2645 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2646 __ Add(OutputRegister(instruction),
2647 InputRegisterAt(instruction, 0),
2648 Operand(InputOperandAt(instruction, 1)));
2649 }
2650
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2651 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2652 LocationSummary* locations =
2653 new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
2654 HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2655 if (instr->GetOpKind() == HInstruction::kSub &&
2656 accumulator->IsConstant() &&
2657 accumulator->AsConstant()->IsArithmeticZero()) {
2658 // Don't allocate register for Mneg instruction.
2659 } else {
2660 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2661 Location::RequiresRegister());
2662 }
2663 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2664 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2665 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2666 }
2667
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2668 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2669 Register res = OutputRegister(instr);
2670 Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2671 Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2672
2673 // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2674 // This fixup should be carried out for all multiply-accumulate instructions:
2675 // madd, msub, smaddl, smsubl, umaddl and umsubl.
2676 if (instr->GetType() == Primitive::kPrimLong &&
2677 codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2678 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2679 vixl::aarch64::Instruction* prev =
2680 masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
2681 if (prev->IsLoadOrStore()) {
2682 // Make sure we emit only exactly one nop.
2683 ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2684 __ nop();
2685 }
2686 }
2687
2688 if (instr->GetOpKind() == HInstruction::kAdd) {
2689 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2690 __ Madd(res, mul_left, mul_right, accumulator);
2691 } else {
2692 DCHECK(instr->GetOpKind() == HInstruction::kSub);
2693 HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2694 if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2695 __ Mneg(res, mul_left, mul_right);
2696 } else {
2697 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2698 __ Msub(res, mul_left, mul_right, accumulator);
2699 }
2700 }
2701 }
2702
VisitArrayGet(HArrayGet * instruction)2703 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2704 bool object_array_get_with_read_barrier =
2705 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
2706 LocationSummary* locations =
2707 new (GetGraph()->GetArena()) LocationSummary(instruction,
2708 object_array_get_with_read_barrier ?
2709 LocationSummary::kCallOnSlowPath :
2710 LocationSummary::kNoCall);
2711 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2712 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2713 // We need a temporary register for the read barrier marking slow
2714 // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
2715 if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2716 !Runtime::Current()->UseJitCompilation() &&
2717 instruction->GetIndex()->IsConstant()) {
2718 // Array loads with constant index are treated as field loads.
2719 // If link-time thunks for the Baker read barrier are enabled, for AOT
2720 // constant index loads we need a temporary only if the offset is too big.
2721 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2722 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2723 offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
2724 if (offset >= kReferenceLoadMinFarOffset) {
2725 locations->AddTemp(FixedTempLocation());
2726 }
2727 } else {
2728 locations->AddTemp(Location::RequiresRegister());
2729 }
2730 }
2731 locations->SetInAt(0, Location::RequiresRegister());
2732 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2733 if (Primitive::IsFloatingPointType(instruction->GetType())) {
2734 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2735 } else {
2736 // The output overlaps in the case of an object array get with
2737 // read barriers enabled: we do not want the move to overwrite the
2738 // array's location, as we need it to emit the read barrier.
2739 locations->SetOut(
2740 Location::RequiresRegister(),
2741 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2742 }
2743 }
2744
VisitArrayGet(HArrayGet * instruction)2745 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2746 Primitive::Type type = instruction->GetType();
2747 Register obj = InputRegisterAt(instruction, 0);
2748 LocationSummary* locations = instruction->GetLocations();
2749 Location index = locations->InAt(1);
2750 Location out = locations->Out();
2751 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2752 const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2753 instruction->IsStringCharAt();
2754 MacroAssembler* masm = GetVIXLAssembler();
2755 UseScratchRegisterScope temps(masm);
2756
2757 // The read barrier instrumentation of object ArrayGet instructions
2758 // does not support the HIntermediateAddress instruction.
2759 DCHECK(!((type == Primitive::kPrimNot) &&
2760 instruction->GetArray()->IsIntermediateAddress() &&
2761 kEmitCompilerReadBarrier));
2762
2763 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2764 // Object ArrayGet with Baker's read barrier case.
2765 // Note that a potential implicit null check is handled in the
2766 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2767 if (index.IsConstant()) {
2768 // Array load with a constant index can be treated as a field load.
2769 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
2770 Location maybe_temp =
2771 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2772 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2773 out,
2774 obj.W(),
2775 offset,
2776 maybe_temp,
2777 /* needs_null_check */ true,
2778 /* use_load_acquire */ false);
2779 } else {
2780 Register temp = WRegisterFrom(locations->GetTemp(0));
2781 codegen_->GenerateArrayLoadWithBakerReadBarrier(
2782 instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
2783 }
2784 } else {
2785 // General case.
2786 MemOperand source = HeapOperand(obj);
2787 Register length;
2788 if (maybe_compressed_char_at) {
2789 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2790 length = temps.AcquireW();
2791 {
2792 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2793 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2794
2795 if (instruction->GetArray()->IsIntermediateAddress()) {
2796 DCHECK_LT(count_offset, offset);
2797 int64_t adjusted_offset =
2798 static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2799 // Note that `adjusted_offset` is negative, so this will be a LDUR.
2800 __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2801 } else {
2802 __ Ldr(length, HeapOperand(obj, count_offset));
2803 }
2804 codegen_->MaybeRecordImplicitNullCheck(instruction);
2805 }
2806 }
2807 if (index.IsConstant()) {
2808 if (maybe_compressed_char_at) {
2809 vixl::aarch64::Label uncompressed_load, done;
2810 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2811 "Expecting 0=compressed, 1=uncompressed");
2812 __ Tbnz(length.W(), 0, &uncompressed_load);
2813 __ Ldrb(Register(OutputCPURegister(instruction)),
2814 HeapOperand(obj, offset + Int64ConstantFrom(index)));
2815 __ B(&done);
2816 __ Bind(&uncompressed_load);
2817 __ Ldrh(Register(OutputCPURegister(instruction)),
2818 HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
2819 __ Bind(&done);
2820 } else {
2821 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
2822 source = HeapOperand(obj, offset);
2823 }
2824 } else {
2825 Register temp = temps.AcquireSameSizeAs(obj);
2826 if (instruction->GetArray()->IsIntermediateAddress()) {
2827 // We do not need to compute the intermediate address from the array: the
2828 // input instruction has done it already. See the comment in
2829 // `TryExtractArrayAccessAddress()`.
2830 if (kIsDebugBuild) {
2831 HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2832 DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2833 }
2834 temp = obj;
2835 } else {
2836 __ Add(temp, obj, offset);
2837 }
2838 if (maybe_compressed_char_at) {
2839 vixl::aarch64::Label uncompressed_load, done;
2840 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2841 "Expecting 0=compressed, 1=uncompressed");
2842 __ Tbnz(length.W(), 0, &uncompressed_load);
2843 __ Ldrb(Register(OutputCPURegister(instruction)),
2844 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2845 __ B(&done);
2846 __ Bind(&uncompressed_load);
2847 __ Ldrh(Register(OutputCPURegister(instruction)),
2848 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2849 __ Bind(&done);
2850 } else {
2851 source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
2852 }
2853 }
2854 if (!maybe_compressed_char_at) {
2855 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2856 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2857 codegen_->Load(type, OutputCPURegister(instruction), source);
2858 codegen_->MaybeRecordImplicitNullCheck(instruction);
2859 }
2860
2861 if (type == Primitive::kPrimNot) {
2862 static_assert(
2863 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2864 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2865 Location obj_loc = locations->InAt(0);
2866 if (index.IsConstant()) {
2867 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2868 } else {
2869 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2870 }
2871 }
2872 }
2873 }
2874
VisitArrayLength(HArrayLength * instruction)2875 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2876 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
2877 locations->SetInAt(0, Location::RequiresRegister());
2878 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2879 }
2880
VisitArrayLength(HArrayLength * instruction)2881 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2882 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2883 vixl::aarch64::Register out = OutputRegister(instruction);
2884 {
2885 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2886 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2887 __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2888 codegen_->MaybeRecordImplicitNullCheck(instruction);
2889 }
2890 // Mask out compression flag from String's array length.
2891 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2892 __ Lsr(out.W(), out.W(), 1u);
2893 }
2894 }
2895
VisitArraySet(HArraySet * instruction)2896 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2897 Primitive::Type value_type = instruction->GetComponentType();
2898
2899 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2900 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
2901 instruction,
2902 may_need_runtime_call_for_type_check ?
2903 LocationSummary::kCallOnSlowPath :
2904 LocationSummary::kNoCall);
2905 locations->SetInAt(0, Location::RequiresRegister());
2906 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2907 if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
2908 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
2909 } else if (Primitive::IsFloatingPointType(value_type)) {
2910 locations->SetInAt(2, Location::RequiresFpuRegister());
2911 } else {
2912 locations->SetInAt(2, Location::RequiresRegister());
2913 }
2914 }
2915
VisitArraySet(HArraySet * instruction)2916 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2917 Primitive::Type value_type = instruction->GetComponentType();
2918 LocationSummary* locations = instruction->GetLocations();
2919 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2920 bool needs_write_barrier =
2921 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2922
2923 Register array = InputRegisterAt(instruction, 0);
2924 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2925 CPURegister source = value;
2926 Location index = locations->InAt(1);
2927 size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
2928 MemOperand destination = HeapOperand(array);
2929 MacroAssembler* masm = GetVIXLAssembler();
2930
2931 if (!needs_write_barrier) {
2932 DCHECK(!may_need_runtime_call_for_type_check);
2933 if (index.IsConstant()) {
2934 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
2935 destination = HeapOperand(array, offset);
2936 } else {
2937 UseScratchRegisterScope temps(masm);
2938 Register temp = temps.AcquireSameSizeAs(array);
2939 if (instruction->GetArray()->IsIntermediateAddress()) {
2940 // We do not need to compute the intermediate address from the array: the
2941 // input instruction has done it already. See the comment in
2942 // `TryExtractArrayAccessAddress()`.
2943 if (kIsDebugBuild) {
2944 HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2945 DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2946 }
2947 temp = array;
2948 } else {
2949 __ Add(temp, array, offset);
2950 }
2951 destination = HeapOperand(temp,
2952 XRegisterFrom(index),
2953 LSL,
2954 Primitive::ComponentSizeShift(value_type));
2955 }
2956 {
2957 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2958 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2959 codegen_->Store(value_type, value, destination);
2960 codegen_->MaybeRecordImplicitNullCheck(instruction);
2961 }
2962 } else {
2963 DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2964 vixl::aarch64::Label done;
2965 SlowPathCodeARM64* slow_path = nullptr;
2966 {
2967 // We use a block to end the scratch scope before the write barrier, thus
2968 // freeing the temporary registers so they can be used in `MarkGCCard`.
2969 UseScratchRegisterScope temps(masm);
2970 Register temp = temps.AcquireSameSizeAs(array);
2971 if (index.IsConstant()) {
2972 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
2973 destination = HeapOperand(array, offset);
2974 } else {
2975 destination = HeapOperand(temp,
2976 XRegisterFrom(index),
2977 LSL,
2978 Primitive::ComponentSizeShift(value_type));
2979 }
2980
2981 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2982 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2983 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2984
2985 if (may_need_runtime_call_for_type_check) {
2986 slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
2987 codegen_->AddSlowPath(slow_path);
2988 if (instruction->GetValueCanBeNull()) {
2989 vixl::aarch64::Label non_zero;
2990 __ Cbnz(Register(value), &non_zero);
2991 if (!index.IsConstant()) {
2992 __ Add(temp, array, offset);
2993 }
2994 {
2995 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
2996 // emitted.
2997 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2998 __ Str(wzr, destination);
2999 codegen_->MaybeRecordImplicitNullCheck(instruction);
3000 }
3001 __ B(&done);
3002 __ Bind(&non_zero);
3003 }
3004
3005 // Note that when Baker read barriers are enabled, the type
3006 // checks are performed without read barriers. This is fine,
3007 // even in the case where a class object is in the from-space
3008 // after the flip, as a comparison involving such a type would
3009 // not produce a false positive; it may of course produce a
3010 // false negative, in which case we would take the ArraySet
3011 // slow path.
3012
3013 Register temp2 = temps.AcquireSameSizeAs(array);
3014 // /* HeapReference<Class> */ temp = array->klass_
3015 {
3016 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
3017 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3018 __ Ldr(temp, HeapOperand(array, class_offset));
3019 codegen_->MaybeRecordImplicitNullCheck(instruction);
3020 }
3021 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3022
3023 // /* HeapReference<Class> */ temp = temp->component_type_
3024 __ Ldr(temp, HeapOperand(temp, component_offset));
3025 // /* HeapReference<Class> */ temp2 = value->klass_
3026 __ Ldr(temp2, HeapOperand(Register(value), class_offset));
3027 // If heap poisoning is enabled, no need to unpoison `temp`
3028 // nor `temp2`, as we are comparing two poisoned references.
3029 __ Cmp(temp, temp2);
3030 temps.Release(temp2);
3031
3032 if (instruction->StaticTypeOfArrayIsObjectArray()) {
3033 vixl::aarch64::Label do_put;
3034 __ B(eq, &do_put);
3035 // If heap poisoning is enabled, the `temp` reference has
3036 // not been unpoisoned yet; unpoison it now.
3037 GetAssembler()->MaybeUnpoisonHeapReference(temp);
3038
3039 // /* HeapReference<Class> */ temp = temp->super_class_
3040 __ Ldr(temp, HeapOperand(temp, super_offset));
3041 // If heap poisoning is enabled, no need to unpoison
3042 // `temp`, as we are comparing against null below.
3043 __ Cbnz(temp, slow_path->GetEntryLabel());
3044 __ Bind(&do_put);
3045 } else {
3046 __ B(ne, slow_path->GetEntryLabel());
3047 }
3048 }
3049
3050 if (kPoisonHeapReferences) {
3051 Register temp2 = temps.AcquireSameSizeAs(array);
3052 DCHECK(value.IsW());
3053 __ Mov(temp2, value.W());
3054 GetAssembler()->PoisonHeapReference(temp2);
3055 source = temp2;
3056 }
3057
3058 if (!index.IsConstant()) {
3059 __ Add(temp, array, offset);
3060 } else {
3061 // We no longer need the `temp` here so release it as the store below may
3062 // need a scratch register (if the constant index makes the offset too large)
3063 // and the poisoned `source` could be using the other scratch register.
3064 temps.Release(temp);
3065 }
3066 {
3067 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3068 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3069 __ Str(source, destination);
3070
3071 if (!may_need_runtime_call_for_type_check) {
3072 codegen_->MaybeRecordImplicitNullCheck(instruction);
3073 }
3074 }
3075 }
3076
3077 codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
3078
3079 if (done.IsLinked()) {
3080 __ Bind(&done);
3081 }
3082
3083 if (slow_path != nullptr) {
3084 __ Bind(slow_path->GetExitLabel());
3085 }
3086 }
3087 }
3088
VisitBoundsCheck(HBoundsCheck * instruction)3089 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3090 RegisterSet caller_saves = RegisterSet::Empty();
3091 InvokeRuntimeCallingConvention calling_convention;
3092 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3093 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
3094 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
3095 locations->SetInAt(0, Location::RequiresRegister());
3096 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3097 }
3098
VisitBoundsCheck(HBoundsCheck * instruction)3099 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3100 BoundsCheckSlowPathARM64* slow_path =
3101 new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
3102 codegen_->AddSlowPath(slow_path);
3103 __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
3104 __ B(slow_path->GetEntryLabel(), hs);
3105 }
3106
VisitClinitCheck(HClinitCheck * check)3107 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
3108 LocationSummary* locations =
3109 new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
3110 locations->SetInAt(0, Location::RequiresRegister());
3111 if (check->HasUses()) {
3112 locations->SetOut(Location::SameAsFirstInput());
3113 }
3114 }
3115
VisitClinitCheck(HClinitCheck * check)3116 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
3117 // We assume the class is not null.
3118 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
3119 check->GetLoadClass(), check, check->GetDexPc(), true);
3120 codegen_->AddSlowPath(slow_path);
3121 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
3122 }
3123
IsFloatingPointZeroConstant(HInstruction * inst)3124 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
3125 return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
3126 || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
3127 }
3128
GenerateFcmp(HInstruction * instruction)3129 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
3130 FPRegister lhs_reg = InputFPRegisterAt(instruction, 0);
3131 Location rhs_loc = instruction->GetLocations()->InAt(1);
3132 if (rhs_loc.IsConstant()) {
3133 // 0.0 is the only immediate that can be encoded directly in
3134 // an FCMP instruction.
3135 //
3136 // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
3137 // specify that in a floating-point comparison, positive zero
3138 // and negative zero are considered equal, so we can use the
3139 // literal 0.0 for both cases here.
3140 //
3141 // Note however that some methods (Float.equal, Float.compare,
3142 // Float.compareTo, Double.equal, Double.compare,
3143 // Double.compareTo, Math.max, Math.min, StrictMath.max,
3144 // StrictMath.min) consider 0.0 to be (strictly) greater than
3145 // -0.0. So if we ever translate calls to these methods into a
3146 // HCompare instruction, we must handle the -0.0 case with
3147 // care here.
3148 DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
3149 __ Fcmp(lhs_reg, 0.0);
3150 } else {
3151 __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
3152 }
3153 }
3154
VisitCompare(HCompare * compare)3155 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
3156 LocationSummary* locations =
3157 new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
3158 Primitive::Type in_type = compare->InputAt(0)->GetType();
3159 switch (in_type) {
3160 case Primitive::kPrimBoolean:
3161 case Primitive::kPrimByte:
3162 case Primitive::kPrimShort:
3163 case Primitive::kPrimChar:
3164 case Primitive::kPrimInt:
3165 case Primitive::kPrimLong: {
3166 locations->SetInAt(0, Location::RequiresRegister());
3167 locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
3168 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3169 break;
3170 }
3171 case Primitive::kPrimFloat:
3172 case Primitive::kPrimDouble: {
3173 locations->SetInAt(0, Location::RequiresFpuRegister());
3174 locations->SetInAt(1,
3175 IsFloatingPointZeroConstant(compare->InputAt(1))
3176 ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
3177 : Location::RequiresFpuRegister());
3178 locations->SetOut(Location::RequiresRegister());
3179 break;
3180 }
3181 default:
3182 LOG(FATAL) << "Unexpected type for compare operation " << in_type;
3183 }
3184 }
3185
VisitCompare(HCompare * compare)3186 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
3187 Primitive::Type in_type = compare->InputAt(0)->GetType();
3188
3189 // 0 if: left == right
3190 // 1 if: left > right
3191 // -1 if: left < right
3192 switch (in_type) {
3193 case Primitive::kPrimBoolean:
3194 case Primitive::kPrimByte:
3195 case Primitive::kPrimShort:
3196 case Primitive::kPrimChar:
3197 case Primitive::kPrimInt:
3198 case Primitive::kPrimLong: {
3199 Register result = OutputRegister(compare);
3200 Register left = InputRegisterAt(compare, 0);
3201 Operand right = InputOperandAt(compare, 1);
3202 __ Cmp(left, right);
3203 __ Cset(result, ne); // result == +1 if NE or 0 otherwise
3204 __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise
3205 break;
3206 }
3207 case Primitive::kPrimFloat:
3208 case Primitive::kPrimDouble: {
3209 Register result = OutputRegister(compare);
3210 GenerateFcmp(compare);
3211 __ Cset(result, ne);
3212 __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3213 break;
3214 }
3215 default:
3216 LOG(FATAL) << "Unimplemented compare type " << in_type;
3217 }
3218 }
3219
HandleCondition(HCondition * instruction)3220 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3221 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
3222
3223 if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3224 locations->SetInAt(0, Location::RequiresFpuRegister());
3225 locations->SetInAt(1,
3226 IsFloatingPointZeroConstant(instruction->InputAt(1))
3227 ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
3228 : Location::RequiresFpuRegister());
3229 } else {
3230 // Integer cases.
3231 locations->SetInAt(0, Location::RequiresRegister());
3232 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3233 }
3234
3235 if (!instruction->IsEmittedAtUseSite()) {
3236 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3237 }
3238 }
3239
HandleCondition(HCondition * instruction)3240 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3241 if (instruction->IsEmittedAtUseSite()) {
3242 return;
3243 }
3244
3245 LocationSummary* locations = instruction->GetLocations();
3246 Register res = RegisterFrom(locations->Out(), instruction->GetType());
3247 IfCondition if_cond = instruction->GetCondition();
3248
3249 if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3250 GenerateFcmp(instruction);
3251 __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3252 } else {
3253 // Integer cases.
3254 Register lhs = InputRegisterAt(instruction, 0);
3255 Operand rhs = InputOperandAt(instruction, 1);
3256 __ Cmp(lhs, rhs);
3257 __ Cset(res, ARM64Condition(if_cond));
3258 }
3259 }
3260
3261 #define FOR_EACH_CONDITION_INSTRUCTION(M) \
3262 M(Equal) \
3263 M(NotEqual) \
3264 M(LessThan) \
3265 M(LessThanOrEqual) \
3266 M(GreaterThan) \
3267 M(GreaterThanOrEqual) \
3268 M(Below) \
3269 M(BelowOrEqual) \
3270 M(Above) \
3271 M(AboveOrEqual)
3272 #define DEFINE_CONDITION_VISITORS(Name) \
3273 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \
3274 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3275 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3276 #undef DEFINE_CONDITION_VISITORS
3277 #undef FOR_EACH_CONDITION_INSTRUCTION
3278
3279 void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3280 DCHECK(instruction->IsDiv() || instruction->IsRem());
3281
3282 LocationSummary* locations = instruction->GetLocations();
3283 Location second = locations->InAt(1);
3284 DCHECK(second.IsConstant());
3285
3286 Register out = OutputRegister(instruction);
3287 Register dividend = InputRegisterAt(instruction, 0);
3288 int64_t imm = Int64FromConstant(second.GetConstant());
3289 DCHECK(imm == 1 || imm == -1);
3290
3291 if (instruction->IsRem()) {
3292 __ Mov(out, 0);
3293 } else {
3294 if (imm == 1) {
3295 __ Mov(out, dividend);
3296 } else {
3297 __ Neg(out, dividend);
3298 }
3299 }
3300 }
3301
DivRemByPowerOfTwo(HBinaryOperation * instruction)3302 void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
3303 DCHECK(instruction->IsDiv() || instruction->IsRem());
3304
3305 LocationSummary* locations = instruction->GetLocations();
3306 Location second = locations->InAt(1);
3307 DCHECK(second.IsConstant());
3308
3309 Register out = OutputRegister(instruction);
3310 Register dividend = InputRegisterAt(instruction, 0);
3311 int64_t imm = Int64FromConstant(second.GetConstant());
3312 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3313 int ctz_imm = CTZ(abs_imm);
3314
3315 UseScratchRegisterScope temps(GetVIXLAssembler());
3316 Register temp = temps.AcquireSameSizeAs(out);
3317
3318 if (instruction->IsDiv()) {
3319 __ Add(temp, dividend, abs_imm - 1);
3320 __ Cmp(dividend, 0);
3321 __ Csel(out, temp, dividend, lt);
3322 if (imm > 0) {
3323 __ Asr(out, out, ctz_imm);
3324 } else {
3325 __ Neg(out, Operand(out, ASR, ctz_imm));
3326 }
3327 } else {
3328 int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64;
3329 __ Asr(temp, dividend, bits - 1);
3330 __ Lsr(temp, temp, bits - ctz_imm);
3331 __ Add(out, dividend, temp);
3332 __ And(out, out, abs_imm - 1);
3333 __ Sub(out, out, temp);
3334 }
3335 }
3336
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3337 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3338 DCHECK(instruction->IsDiv() || instruction->IsRem());
3339
3340 LocationSummary* locations = instruction->GetLocations();
3341 Location second = locations->InAt(1);
3342 DCHECK(second.IsConstant());
3343
3344 Register out = OutputRegister(instruction);
3345 Register dividend = InputRegisterAt(instruction, 0);
3346 int64_t imm = Int64FromConstant(second.GetConstant());
3347
3348 Primitive::Type type = instruction->GetResultType();
3349 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
3350
3351 int64_t magic;
3352 int shift;
3353 CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift);
3354
3355 UseScratchRegisterScope temps(GetVIXLAssembler());
3356 Register temp = temps.AcquireSameSizeAs(out);
3357
3358 // temp = get_high(dividend * magic)
3359 __ Mov(temp, magic);
3360 if (type == Primitive::kPrimLong) {
3361 __ Smulh(temp, dividend, temp);
3362 } else {
3363 __ Smull(temp.X(), dividend, temp);
3364 __ Lsr(temp.X(), temp.X(), 32);
3365 }
3366
3367 if (imm > 0 && magic < 0) {
3368 __ Add(temp, temp, dividend);
3369 } else if (imm < 0 && magic > 0) {
3370 __ Sub(temp, temp, dividend);
3371 }
3372
3373 if (shift != 0) {
3374 __ Asr(temp, temp, shift);
3375 }
3376
3377 if (instruction->IsDiv()) {
3378 __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
3379 } else {
3380 __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
3381 // TODO: Strength reduction for msub.
3382 Register temp_imm = temps.AcquireSameSizeAs(out);
3383 __ Mov(temp_imm, imm);
3384 __ Msub(out, temp, temp_imm, dividend);
3385 }
3386 }
3387
GenerateDivRemIntegral(HBinaryOperation * instruction)3388 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3389 DCHECK(instruction->IsDiv() || instruction->IsRem());
3390 Primitive::Type type = instruction->GetResultType();
3391 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
3392
3393 LocationSummary* locations = instruction->GetLocations();
3394 Register out = OutputRegister(instruction);
3395 Location second = locations->InAt(1);
3396
3397 if (second.IsConstant()) {
3398 int64_t imm = Int64FromConstant(second.GetConstant());
3399
3400 if (imm == 0) {
3401 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3402 } else if (imm == 1 || imm == -1) {
3403 DivRemOneOrMinusOne(instruction);
3404 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3405 DivRemByPowerOfTwo(instruction);
3406 } else {
3407 DCHECK(imm <= -2 || imm >= 2);
3408 GenerateDivRemWithAnyConstant(instruction);
3409 }
3410 } else {
3411 Register dividend = InputRegisterAt(instruction, 0);
3412 Register divisor = InputRegisterAt(instruction, 1);
3413 if (instruction->IsDiv()) {
3414 __ Sdiv(out, dividend, divisor);
3415 } else {
3416 UseScratchRegisterScope temps(GetVIXLAssembler());
3417 Register temp = temps.AcquireSameSizeAs(out);
3418 __ Sdiv(temp, dividend, divisor);
3419 __ Msub(out, temp, divisor, dividend);
3420 }
3421 }
3422 }
3423
VisitDiv(HDiv * div)3424 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3425 LocationSummary* locations =
3426 new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3427 switch (div->GetResultType()) {
3428 case Primitive::kPrimInt:
3429 case Primitive::kPrimLong:
3430 locations->SetInAt(0, Location::RequiresRegister());
3431 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3432 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3433 break;
3434
3435 case Primitive::kPrimFloat:
3436 case Primitive::kPrimDouble:
3437 locations->SetInAt(0, Location::RequiresFpuRegister());
3438 locations->SetInAt(1, Location::RequiresFpuRegister());
3439 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3440 break;
3441
3442 default:
3443 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3444 }
3445 }
3446
VisitDiv(HDiv * div)3447 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3448 Primitive::Type type = div->GetResultType();
3449 switch (type) {
3450 case Primitive::kPrimInt:
3451 case Primitive::kPrimLong:
3452 GenerateDivRemIntegral(div);
3453 break;
3454
3455 case Primitive::kPrimFloat:
3456 case Primitive::kPrimDouble:
3457 __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3458 break;
3459
3460 default:
3461 LOG(FATAL) << "Unexpected div type " << type;
3462 }
3463 }
3464
VisitDivZeroCheck(HDivZeroCheck * instruction)3465 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3466 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3467 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3468 }
3469
VisitDivZeroCheck(HDivZeroCheck * instruction)3470 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3471 SlowPathCodeARM64* slow_path =
3472 new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM64(instruction);
3473 codegen_->AddSlowPath(slow_path);
3474 Location value = instruction->GetLocations()->InAt(0);
3475
3476 Primitive::Type type = instruction->GetType();
3477
3478 if (!Primitive::IsIntegralType(type)) {
3479 LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3480 return;
3481 }
3482
3483 if (value.IsConstant()) {
3484 int64_t divisor = Int64ConstantFrom(value);
3485 if (divisor == 0) {
3486 __ B(slow_path->GetEntryLabel());
3487 } else {
3488 // A division by a non-null constant is valid. We don't need to perform
3489 // any check, so simply fall through.
3490 }
3491 } else {
3492 __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3493 }
3494 }
3495
VisitDoubleConstant(HDoubleConstant * constant)3496 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3497 LocationSummary* locations =
3498 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
3499 locations->SetOut(Location::ConstantLocation(constant));
3500 }
3501
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3502 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3503 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3504 // Will be generated at use site.
3505 }
3506
VisitExit(HExit * exit)3507 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3508 exit->SetLocations(nullptr);
3509 }
3510
VisitExit(HExit * exit ATTRIBUTE_UNUSED)3511 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3512 }
3513
VisitFloatConstant(HFloatConstant * constant)3514 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3515 LocationSummary* locations =
3516 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
3517 locations->SetOut(Location::ConstantLocation(constant));
3518 }
3519
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3520 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3521 // Will be generated at use site.
3522 }
3523
HandleGoto(HInstruction * got,HBasicBlock * successor)3524 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3525 DCHECK(!successor->IsExitBlock());
3526 HBasicBlock* block = got->GetBlock();
3527 HInstruction* previous = got->GetPrevious();
3528 HLoopInformation* info = block->GetLoopInformation();
3529
3530 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3531 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
3532 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3533 return;
3534 }
3535 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3536 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3537 }
3538 if (!codegen_->GoesToNextBlock(block, successor)) {
3539 __ B(codegen_->GetLabelOf(successor));
3540 }
3541 }
3542
VisitGoto(HGoto * got)3543 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3544 got->SetLocations(nullptr);
3545 }
3546
VisitGoto(HGoto * got)3547 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3548 HandleGoto(got, got->GetSuccessor());
3549 }
3550
VisitTryBoundary(HTryBoundary * try_boundary)3551 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3552 try_boundary->SetLocations(nullptr);
3553 }
3554
VisitTryBoundary(HTryBoundary * try_boundary)3555 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3556 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3557 if (!successor->IsExitBlock()) {
3558 HandleGoto(try_boundary, successor);
3559 }
3560 }
3561
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3562 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3563 size_t condition_input_index,
3564 vixl::aarch64::Label* true_target,
3565 vixl::aarch64::Label* false_target) {
3566 HInstruction* cond = instruction->InputAt(condition_input_index);
3567
3568 if (true_target == nullptr && false_target == nullptr) {
3569 // Nothing to do. The code always falls through.
3570 return;
3571 } else if (cond->IsIntConstant()) {
3572 // Constant condition, statically compared against "true" (integer value 1).
3573 if (cond->AsIntConstant()->IsTrue()) {
3574 if (true_target != nullptr) {
3575 __ B(true_target);
3576 }
3577 } else {
3578 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3579 if (false_target != nullptr) {
3580 __ B(false_target);
3581 }
3582 }
3583 return;
3584 }
3585
3586 // The following code generates these patterns:
3587 // (1) true_target == nullptr && false_target != nullptr
3588 // - opposite condition true => branch to false_target
3589 // (2) true_target != nullptr && false_target == nullptr
3590 // - condition true => branch to true_target
3591 // (3) true_target != nullptr && false_target != nullptr
3592 // - condition true => branch to true_target
3593 // - branch to false_target
3594 if (IsBooleanValueOrMaterializedCondition(cond)) {
3595 // The condition instruction has been materialized, compare the output to 0.
3596 Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3597 DCHECK(cond_val.IsRegister());
3598 if (true_target == nullptr) {
3599 __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3600 } else {
3601 __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3602 }
3603 } else {
3604 // The condition instruction has not been materialized, use its inputs as
3605 // the comparison and its condition as the branch condition.
3606 HCondition* condition = cond->AsCondition();
3607
3608 Primitive::Type type = condition->InputAt(0)->GetType();
3609 if (Primitive::IsFloatingPointType(type)) {
3610 GenerateFcmp(condition);
3611 if (true_target == nullptr) {
3612 IfCondition opposite_condition = condition->GetOppositeCondition();
3613 __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3614 } else {
3615 __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3616 }
3617 } else {
3618 // Integer cases.
3619 Register lhs = InputRegisterAt(condition, 0);
3620 Operand rhs = InputOperandAt(condition, 1);
3621
3622 Condition arm64_cond;
3623 vixl::aarch64::Label* non_fallthrough_target;
3624 if (true_target == nullptr) {
3625 arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3626 non_fallthrough_target = false_target;
3627 } else {
3628 arm64_cond = ARM64Condition(condition->GetCondition());
3629 non_fallthrough_target = true_target;
3630 }
3631
3632 if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3633 rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3634 switch (arm64_cond) {
3635 case eq:
3636 __ Cbz(lhs, non_fallthrough_target);
3637 break;
3638 case ne:
3639 __ Cbnz(lhs, non_fallthrough_target);
3640 break;
3641 case lt:
3642 // Test the sign bit and branch accordingly.
3643 __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3644 break;
3645 case ge:
3646 // Test the sign bit and branch accordingly.
3647 __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3648 break;
3649 default:
3650 // Without the `static_cast` the compiler throws an error for
3651 // `-Werror=sign-promo`.
3652 LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3653 }
3654 } else {
3655 __ Cmp(lhs, rhs);
3656 __ B(arm64_cond, non_fallthrough_target);
3657 }
3658 }
3659 }
3660
3661 // If neither branch falls through (case 3), the conditional branch to `true_target`
3662 // was already emitted (case 2) and we need to emit a jump to `false_target`.
3663 if (true_target != nullptr && false_target != nullptr) {
3664 __ B(false_target);
3665 }
3666 }
3667
VisitIf(HIf * if_instr)3668 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3669 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
3670 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3671 locations->SetInAt(0, Location::RequiresRegister());
3672 }
3673 }
3674
VisitIf(HIf * if_instr)3675 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3676 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3677 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3678 vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3679 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3680 true_target = nullptr;
3681 }
3682 vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3683 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3684 false_target = nullptr;
3685 }
3686 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
3687 }
3688
VisitDeoptimize(HDeoptimize * deoptimize)3689 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3690 LocationSummary* locations = new (GetGraph()->GetArena())
3691 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3692 InvokeRuntimeCallingConvention calling_convention;
3693 RegisterSet caller_saves = RegisterSet::Empty();
3694 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3695 locations->SetCustomSlowPathCallerSaves(caller_saves);
3696 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3697 locations->SetInAt(0, Location::RequiresRegister());
3698 }
3699 }
3700
VisitDeoptimize(HDeoptimize * deoptimize)3701 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3702 SlowPathCodeARM64* slow_path =
3703 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3704 GenerateTestAndBranch(deoptimize,
3705 /* condition_input_index */ 0,
3706 slow_path->GetEntryLabel(),
3707 /* false_target */ nullptr);
3708 }
3709
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3710 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3711 LocationSummary* locations = new (GetGraph()->GetArena())
3712 LocationSummary(flag, LocationSummary::kNoCall);
3713 locations->SetOut(Location::RequiresRegister());
3714 }
3715
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3716 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3717 __ Ldr(OutputRegister(flag),
3718 MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3719 }
3720
IsConditionOnFloatingPointValues(HInstruction * condition)3721 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3722 return condition->IsCondition() &&
3723 Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
3724 }
3725
GetConditionForSelect(HCondition * condition)3726 static inline Condition GetConditionForSelect(HCondition* condition) {
3727 IfCondition cond = condition->AsCondition()->GetCondition();
3728 return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3729 : ARM64Condition(cond);
3730 }
3731
VisitSelect(HSelect * select)3732 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3733 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
3734 if (Primitive::IsFloatingPointType(select->GetType())) {
3735 locations->SetInAt(0, Location::RequiresFpuRegister());
3736 locations->SetInAt(1, Location::RequiresFpuRegister());
3737 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3738 } else {
3739 HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3740 HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3741 bool is_true_value_constant = cst_true_value != nullptr;
3742 bool is_false_value_constant = cst_false_value != nullptr;
3743 // Ask VIXL whether we should synthesize constants in registers.
3744 // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3745 Operand true_op = is_true_value_constant ?
3746 Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3747 Operand false_op = is_false_value_constant ?
3748 Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3749 bool true_value_in_register = false;
3750 bool false_value_in_register = false;
3751 MacroAssembler::GetCselSynthesisInformation(
3752 x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3753 true_value_in_register |= !is_true_value_constant;
3754 false_value_in_register |= !is_false_value_constant;
3755
3756 locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3757 : Location::ConstantLocation(cst_true_value));
3758 locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3759 : Location::ConstantLocation(cst_false_value));
3760 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3761 }
3762
3763 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3764 locations->SetInAt(2, Location::RequiresRegister());
3765 }
3766 }
3767
VisitSelect(HSelect * select)3768 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3769 HInstruction* cond = select->GetCondition();
3770 Condition csel_cond;
3771
3772 if (IsBooleanValueOrMaterializedCondition(cond)) {
3773 if (cond->IsCondition() && cond->GetNext() == select) {
3774 // Use the condition flags set by the previous instruction.
3775 csel_cond = GetConditionForSelect(cond->AsCondition());
3776 } else {
3777 __ Cmp(InputRegisterAt(select, 2), 0);
3778 csel_cond = ne;
3779 }
3780 } else if (IsConditionOnFloatingPointValues(cond)) {
3781 GenerateFcmp(cond);
3782 csel_cond = GetConditionForSelect(cond->AsCondition());
3783 } else {
3784 __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3785 csel_cond = GetConditionForSelect(cond->AsCondition());
3786 }
3787
3788 if (Primitive::IsFloatingPointType(select->GetType())) {
3789 __ Fcsel(OutputFPRegister(select),
3790 InputFPRegisterAt(select, 1),
3791 InputFPRegisterAt(select, 0),
3792 csel_cond);
3793 } else {
3794 __ Csel(OutputRegister(select),
3795 InputOperandAt(select, 1),
3796 InputOperandAt(select, 0),
3797 csel_cond);
3798 }
3799 }
3800
VisitNativeDebugInfo(HNativeDebugInfo * info)3801 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3802 new (GetGraph()->GetArena()) LocationSummary(info);
3803 }
3804
VisitNativeDebugInfo(HNativeDebugInfo *)3805 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
3806 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3807 }
3808
GenerateNop()3809 void CodeGeneratorARM64::GenerateNop() {
3810 __ Nop();
3811 }
3812
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3813 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3814 HandleFieldGet(instruction, instruction->GetFieldInfo());
3815 }
3816
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3817 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3818 HandleFieldGet(instruction, instruction->GetFieldInfo());
3819 }
3820
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3821 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3822 HandleFieldSet(instruction);
3823 }
3824
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3825 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3826 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
3827 }
3828
3829 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)3830 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
3831 if (kEmitCompilerReadBarrier &&
3832 (kUseBakerReadBarrier ||
3833 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
3834 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
3835 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
3836 return 1;
3837 }
3838 return 0;
3839 }
3840
3841 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
3842 // interface pointer, one for loading the current interface.
3843 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)3844 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
3845 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
3846 return 3;
3847 }
3848 return 1 + NumberOfInstanceOfTemps(type_check_kind);
3849 }
3850
VisitInstanceOf(HInstanceOf * instruction)3851 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
3852 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
3853 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3854 bool baker_read_barrier_slow_path = false;
3855 switch (type_check_kind) {
3856 case TypeCheckKind::kExactCheck:
3857 case TypeCheckKind::kAbstractClassCheck:
3858 case TypeCheckKind::kClassHierarchyCheck:
3859 case TypeCheckKind::kArrayObjectCheck:
3860 call_kind =
3861 kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
3862 baker_read_barrier_slow_path = kUseBakerReadBarrier;
3863 break;
3864 case TypeCheckKind::kArrayCheck:
3865 case TypeCheckKind::kUnresolvedCheck:
3866 case TypeCheckKind::kInterfaceCheck:
3867 call_kind = LocationSummary::kCallOnSlowPath;
3868 break;
3869 }
3870
3871 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
3872 if (baker_read_barrier_slow_path) {
3873 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
3874 }
3875 locations->SetInAt(0, Location::RequiresRegister());
3876 locations->SetInAt(1, Location::RequiresRegister());
3877 // The "out" register is used as a temporary, so it overlaps with the inputs.
3878 // Note that TypeCheckSlowPathARM64 uses this register too.
3879 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3880 // Add temps if necessary for read barriers.
3881 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
3882 }
3883
VisitInstanceOf(HInstanceOf * instruction)3884 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
3885 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3886 LocationSummary* locations = instruction->GetLocations();
3887 Location obj_loc = locations->InAt(0);
3888 Register obj = InputRegisterAt(instruction, 0);
3889 Register cls = InputRegisterAt(instruction, 1);
3890 Location out_loc = locations->Out();
3891 Register out = OutputRegister(instruction);
3892 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
3893 DCHECK_LE(num_temps, 1u);
3894 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
3895 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3896 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3897 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3898 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3899
3900 vixl::aarch64::Label done, zero;
3901 SlowPathCodeARM64* slow_path = nullptr;
3902
3903 // Return 0 if `obj` is null.
3904 // Avoid null check if we know `obj` is not null.
3905 if (instruction->MustDoNullCheck()) {
3906 __ Cbz(obj, &zero);
3907 }
3908
3909 switch (type_check_kind) {
3910 case TypeCheckKind::kExactCheck: {
3911 // /* HeapReference<Class> */ out = obj->klass_
3912 GenerateReferenceLoadTwoRegisters(instruction,
3913 out_loc,
3914 obj_loc,
3915 class_offset,
3916 maybe_temp_loc,
3917 kCompilerReadBarrierOption);
3918 __ Cmp(out, cls);
3919 __ Cset(out, eq);
3920 if (zero.IsLinked()) {
3921 __ B(&done);
3922 }
3923 break;
3924 }
3925
3926 case TypeCheckKind::kAbstractClassCheck: {
3927 // /* HeapReference<Class> */ out = obj->klass_
3928 GenerateReferenceLoadTwoRegisters(instruction,
3929 out_loc,
3930 obj_loc,
3931 class_offset,
3932 maybe_temp_loc,
3933 kCompilerReadBarrierOption);
3934 // If the class is abstract, we eagerly fetch the super class of the
3935 // object to avoid doing a comparison we know will fail.
3936 vixl::aarch64::Label loop, success;
3937 __ Bind(&loop);
3938 // /* HeapReference<Class> */ out = out->super_class_
3939 GenerateReferenceLoadOneRegister(instruction,
3940 out_loc,
3941 super_offset,
3942 maybe_temp_loc,
3943 kCompilerReadBarrierOption);
3944 // If `out` is null, we use it for the result, and jump to `done`.
3945 __ Cbz(out, &done);
3946 __ Cmp(out, cls);
3947 __ B(ne, &loop);
3948 __ Mov(out, 1);
3949 if (zero.IsLinked()) {
3950 __ B(&done);
3951 }
3952 break;
3953 }
3954
3955 case TypeCheckKind::kClassHierarchyCheck: {
3956 // /* HeapReference<Class> */ out = obj->klass_
3957 GenerateReferenceLoadTwoRegisters(instruction,
3958 out_loc,
3959 obj_loc,
3960 class_offset,
3961 maybe_temp_loc,
3962 kCompilerReadBarrierOption);
3963 // Walk over the class hierarchy to find a match.
3964 vixl::aarch64::Label loop, success;
3965 __ Bind(&loop);
3966 __ Cmp(out, cls);
3967 __ B(eq, &success);
3968 // /* HeapReference<Class> */ out = out->super_class_
3969 GenerateReferenceLoadOneRegister(instruction,
3970 out_loc,
3971 super_offset,
3972 maybe_temp_loc,
3973 kCompilerReadBarrierOption);
3974 __ Cbnz(out, &loop);
3975 // If `out` is null, we use it for the result, and jump to `done`.
3976 __ B(&done);
3977 __ Bind(&success);
3978 __ Mov(out, 1);
3979 if (zero.IsLinked()) {
3980 __ B(&done);
3981 }
3982 break;
3983 }
3984
3985 case TypeCheckKind::kArrayObjectCheck: {
3986 // /* HeapReference<Class> */ out = obj->klass_
3987 GenerateReferenceLoadTwoRegisters(instruction,
3988 out_loc,
3989 obj_loc,
3990 class_offset,
3991 maybe_temp_loc,
3992 kCompilerReadBarrierOption);
3993 // Do an exact check.
3994 vixl::aarch64::Label exact_check;
3995 __ Cmp(out, cls);
3996 __ B(eq, &exact_check);
3997 // Otherwise, we need to check that the object's class is a non-primitive array.
3998 // /* HeapReference<Class> */ out = out->component_type_
3999 GenerateReferenceLoadOneRegister(instruction,
4000 out_loc,
4001 component_offset,
4002 maybe_temp_loc,
4003 kCompilerReadBarrierOption);
4004 // If `out` is null, we use it for the result, and jump to `done`.
4005 __ Cbz(out, &done);
4006 __ Ldrh(out, HeapOperand(out, primitive_offset));
4007 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4008 __ Cbnz(out, &zero);
4009 __ Bind(&exact_check);
4010 __ Mov(out, 1);
4011 __ B(&done);
4012 break;
4013 }
4014
4015 case TypeCheckKind::kArrayCheck: {
4016 // No read barrier since the slow path will retry upon failure.
4017 // /* HeapReference<Class> */ out = obj->klass_
4018 GenerateReferenceLoadTwoRegisters(instruction,
4019 out_loc,
4020 obj_loc,
4021 class_offset,
4022 maybe_temp_loc,
4023 kWithoutReadBarrier);
4024 __ Cmp(out, cls);
4025 DCHECK(locations->OnlyCallsOnSlowPath());
4026 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
4027 /* is_fatal */ false);
4028 codegen_->AddSlowPath(slow_path);
4029 __ B(ne, slow_path->GetEntryLabel());
4030 __ Mov(out, 1);
4031 if (zero.IsLinked()) {
4032 __ B(&done);
4033 }
4034 break;
4035 }
4036
4037 case TypeCheckKind::kUnresolvedCheck:
4038 case TypeCheckKind::kInterfaceCheck: {
4039 // Note that we indeed only call on slow path, but we always go
4040 // into the slow path for the unresolved and interface check
4041 // cases.
4042 //
4043 // We cannot directly call the InstanceofNonTrivial runtime
4044 // entry point without resorting to a type checking slow path
4045 // here (i.e. by calling InvokeRuntime directly), as it would
4046 // require to assign fixed registers for the inputs of this
4047 // HInstanceOf instruction (following the runtime calling
4048 // convention), which might be cluttered by the potential first
4049 // read barrier emission at the beginning of this method.
4050 //
4051 // TODO: Introduce a new runtime entry point taking the object
4052 // to test (instead of its class) as argument, and let it deal
4053 // with the read barrier issues. This will let us refactor this
4054 // case of the `switch` code as it was previously (with a direct
4055 // call to the runtime not using a type checking slow path).
4056 // This should also be beneficial for the other cases above.
4057 DCHECK(locations->OnlyCallsOnSlowPath());
4058 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
4059 /* is_fatal */ false);
4060 codegen_->AddSlowPath(slow_path);
4061 __ B(slow_path->GetEntryLabel());
4062 if (zero.IsLinked()) {
4063 __ B(&done);
4064 }
4065 break;
4066 }
4067 }
4068
4069 if (zero.IsLinked()) {
4070 __ Bind(&zero);
4071 __ Mov(out, 0);
4072 }
4073
4074 if (done.IsLinked()) {
4075 __ Bind(&done);
4076 }
4077
4078 if (slow_path != nullptr) {
4079 __ Bind(slow_path->GetExitLabel());
4080 }
4081 }
4082
VisitCheckCast(HCheckCast * instruction)4083 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4084 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4085 bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
4086
4087 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4088 switch (type_check_kind) {
4089 case TypeCheckKind::kExactCheck:
4090 case TypeCheckKind::kAbstractClassCheck:
4091 case TypeCheckKind::kClassHierarchyCheck:
4092 case TypeCheckKind::kArrayObjectCheck:
4093 call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
4094 LocationSummary::kCallOnSlowPath :
4095 LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
4096 break;
4097 case TypeCheckKind::kArrayCheck:
4098 case TypeCheckKind::kUnresolvedCheck:
4099 case TypeCheckKind::kInterfaceCheck:
4100 call_kind = LocationSummary::kCallOnSlowPath;
4101 break;
4102 }
4103
4104 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4105 locations->SetInAt(0, Location::RequiresRegister());
4106 locations->SetInAt(1, Location::RequiresRegister());
4107 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
4108 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
4109 }
4110
VisitCheckCast(HCheckCast * instruction)4111 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4112 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4113 LocationSummary* locations = instruction->GetLocations();
4114 Location obj_loc = locations->InAt(0);
4115 Register obj = InputRegisterAt(instruction, 0);
4116 Register cls = InputRegisterAt(instruction, 1);
4117 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
4118 DCHECK_GE(num_temps, 1u);
4119 DCHECK_LE(num_temps, 3u);
4120 Location temp_loc = locations->GetTemp(0);
4121 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4122 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4123 Register temp = WRegisterFrom(temp_loc);
4124 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4125 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4126 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4127 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4128 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4129 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4130 const uint32_t object_array_data_offset =
4131 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4132
4133 bool is_type_check_slow_path_fatal = false;
4134 // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
4135 // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
4136 // read barriers is done for performance and code size reasons.
4137 if (!kEmitCompilerReadBarrier) {
4138 is_type_check_slow_path_fatal =
4139 (type_check_kind == TypeCheckKind::kExactCheck ||
4140 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
4141 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
4142 type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
4143 !instruction->CanThrowIntoCatchBlock();
4144 }
4145 SlowPathCodeARM64* type_check_slow_path =
4146 new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
4147 is_type_check_slow_path_fatal);
4148 codegen_->AddSlowPath(type_check_slow_path);
4149
4150 vixl::aarch64::Label done;
4151 // Avoid null check if we know obj is not null.
4152 if (instruction->MustDoNullCheck()) {
4153 __ Cbz(obj, &done);
4154 }
4155
4156 switch (type_check_kind) {
4157 case TypeCheckKind::kExactCheck:
4158 case TypeCheckKind::kArrayCheck: {
4159 // /* HeapReference<Class> */ temp = obj->klass_
4160 GenerateReferenceLoadTwoRegisters(instruction,
4161 temp_loc,
4162 obj_loc,
4163 class_offset,
4164 maybe_temp2_loc,
4165 kWithoutReadBarrier);
4166
4167 __ Cmp(temp, cls);
4168 // Jump to slow path for throwing the exception or doing a
4169 // more involved array check.
4170 __ B(ne, type_check_slow_path->GetEntryLabel());
4171 break;
4172 }
4173
4174 case TypeCheckKind::kAbstractClassCheck: {
4175 // /* HeapReference<Class> */ temp = obj->klass_
4176 GenerateReferenceLoadTwoRegisters(instruction,
4177 temp_loc,
4178 obj_loc,
4179 class_offset,
4180 maybe_temp2_loc,
4181 kWithoutReadBarrier);
4182
4183 // If the class is abstract, we eagerly fetch the super class of the
4184 // object to avoid doing a comparison we know will fail.
4185 vixl::aarch64::Label loop;
4186 __ Bind(&loop);
4187 // /* HeapReference<Class> */ temp = temp->super_class_
4188 GenerateReferenceLoadOneRegister(instruction,
4189 temp_loc,
4190 super_offset,
4191 maybe_temp2_loc,
4192 kWithoutReadBarrier);
4193
4194 // If the class reference currently in `temp` is null, jump to the slow path to throw the
4195 // exception.
4196 __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4197 // Otherwise, compare classes.
4198 __ Cmp(temp, cls);
4199 __ B(ne, &loop);
4200 break;
4201 }
4202
4203 case TypeCheckKind::kClassHierarchyCheck: {
4204 // /* HeapReference<Class> */ temp = obj->klass_
4205 GenerateReferenceLoadTwoRegisters(instruction,
4206 temp_loc,
4207 obj_loc,
4208 class_offset,
4209 maybe_temp2_loc,
4210 kWithoutReadBarrier);
4211
4212 // Walk over the class hierarchy to find a match.
4213 vixl::aarch64::Label loop;
4214 __ Bind(&loop);
4215 __ Cmp(temp, cls);
4216 __ B(eq, &done);
4217
4218 // /* HeapReference<Class> */ temp = temp->super_class_
4219 GenerateReferenceLoadOneRegister(instruction,
4220 temp_loc,
4221 super_offset,
4222 maybe_temp2_loc,
4223 kWithoutReadBarrier);
4224
4225 // If the class reference currently in `temp` is not null, jump
4226 // back at the beginning of the loop.
4227 __ Cbnz(temp, &loop);
4228 // Otherwise, jump to the slow path to throw the exception.
4229 __ B(type_check_slow_path->GetEntryLabel());
4230 break;
4231 }
4232
4233 case TypeCheckKind::kArrayObjectCheck: {
4234 // /* HeapReference<Class> */ temp = obj->klass_
4235 GenerateReferenceLoadTwoRegisters(instruction,
4236 temp_loc,
4237 obj_loc,
4238 class_offset,
4239 maybe_temp2_loc,
4240 kWithoutReadBarrier);
4241
4242 // Do an exact check.
4243 __ Cmp(temp, cls);
4244 __ B(eq, &done);
4245
4246 // Otherwise, we need to check that the object's class is a non-primitive array.
4247 // /* HeapReference<Class> */ temp = temp->component_type_
4248 GenerateReferenceLoadOneRegister(instruction,
4249 temp_loc,
4250 component_offset,
4251 maybe_temp2_loc,
4252 kWithoutReadBarrier);
4253
4254 // If the component type is null, jump to the slow path to throw the exception.
4255 __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4256 // Otherwise, the object is indeed an array. Further check that this component type is not a
4257 // primitive type.
4258 __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4259 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4260 __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4261 break;
4262 }
4263
4264 case TypeCheckKind::kUnresolvedCheck:
4265 // We always go into the type check slow path for the unresolved check cases.
4266 //
4267 // We cannot directly call the CheckCast runtime entry point
4268 // without resorting to a type checking slow path here (i.e. by
4269 // calling InvokeRuntime directly), as it would require to
4270 // assign fixed registers for the inputs of this HInstanceOf
4271 // instruction (following the runtime calling convention), which
4272 // might be cluttered by the potential first read barrier
4273 // emission at the beginning of this method.
4274 __ B(type_check_slow_path->GetEntryLabel());
4275 break;
4276 case TypeCheckKind::kInterfaceCheck: {
4277 // /* HeapReference<Class> */ temp = obj->klass_
4278 GenerateReferenceLoadTwoRegisters(instruction,
4279 temp_loc,
4280 obj_loc,
4281 class_offset,
4282 maybe_temp2_loc,
4283 kWithoutReadBarrier);
4284
4285 // /* HeapReference<Class> */ temp = temp->iftable_
4286 GenerateReferenceLoadTwoRegisters(instruction,
4287 temp_loc,
4288 temp_loc,
4289 iftable_offset,
4290 maybe_temp2_loc,
4291 kWithoutReadBarrier);
4292 // Iftable is never null.
4293 __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4294 // Loop through the iftable and check if any class matches.
4295 vixl::aarch64::Label start_loop;
4296 __ Bind(&start_loop);
4297 __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4298 __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4299 GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4300 // Go to next interface.
4301 __ Add(temp, temp, 2 * kHeapReferenceSize);
4302 __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4303 // Compare the classes and continue the loop if they do not match.
4304 __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4305 __ B(ne, &start_loop);
4306 break;
4307 }
4308 }
4309 __ Bind(&done);
4310
4311 __ Bind(type_check_slow_path->GetExitLabel());
4312 }
4313
VisitIntConstant(HIntConstant * constant)4314 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4315 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
4316 locations->SetOut(Location::ConstantLocation(constant));
4317 }
4318
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)4319 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
4320 // Will be generated at use site.
4321 }
4322
VisitNullConstant(HNullConstant * constant)4323 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4324 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
4325 locations->SetOut(Location::ConstantLocation(constant));
4326 }
4327
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)4328 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
4329 // Will be generated at use site.
4330 }
4331
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4332 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4333 // The trampoline uses the same calling convention as dex calling conventions,
4334 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4335 // the method_idx.
4336 HandleInvoke(invoke);
4337 }
4338
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4339 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4340 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4341 }
4342
HandleInvoke(HInvoke * invoke)4343 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4344 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4345 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4346 }
4347
VisitInvokeInterface(HInvokeInterface * invoke)4348 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4349 HandleInvoke(invoke);
4350 }
4351
VisitInvokeInterface(HInvokeInterface * invoke)4352 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4353 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4354 LocationSummary* locations = invoke->GetLocations();
4355 Register temp = XRegisterFrom(locations->GetTemp(0));
4356 Location receiver = locations->InAt(0);
4357 Offset class_offset = mirror::Object::ClassOffset();
4358 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4359
4360 // The register ip1 is required to be used for the hidden argument in
4361 // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4362 MacroAssembler* masm = GetVIXLAssembler();
4363 UseScratchRegisterScope scratch_scope(masm);
4364 scratch_scope.Exclude(ip1);
4365 __ Mov(ip1, invoke->GetDexMethodIndex());
4366
4367 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4368 if (receiver.IsStackSlot()) {
4369 __ Ldr(temp.W(), StackOperandFrom(receiver));
4370 {
4371 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4372 // /* HeapReference<Class> */ temp = temp->klass_
4373 __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4374 codegen_->MaybeRecordImplicitNullCheck(invoke);
4375 }
4376 } else {
4377 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4378 // /* HeapReference<Class> */ temp = receiver->klass_
4379 __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4380 codegen_->MaybeRecordImplicitNullCheck(invoke);
4381 }
4382
4383 // Instead of simply (possibly) unpoisoning `temp` here, we should
4384 // emit a read barrier for the previous class reference load.
4385 // However this is not required in practice, as this is an
4386 // intermediate/temporary reference and because the current
4387 // concurrent copying collector keeps the from-space memory
4388 // intact/accessible until the end of the marking phase (the
4389 // concurrent copying collector may not in the future).
4390 GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4391 __ Ldr(temp,
4392 MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4393 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4394 invoke->GetImtIndex(), kArm64PointerSize));
4395 // temp = temp->GetImtEntryAt(method_offset);
4396 __ Ldr(temp, MemOperand(temp, method_offset));
4397 // lr = temp->GetEntryPoint();
4398 __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4399
4400 {
4401 // Ensure the pc position is recorded immediately after the `blr` instruction.
4402 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4403
4404 // lr();
4405 __ blr(lr);
4406 DCHECK(!codegen_->IsLeafMethod());
4407 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4408 }
4409 }
4410
VisitInvokeVirtual(HInvokeVirtual * invoke)4411 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4412 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
4413 if (intrinsic.TryDispatch(invoke)) {
4414 return;
4415 }
4416
4417 HandleInvoke(invoke);
4418 }
4419
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4420 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4421 // Explicit clinit checks triggered by static invokes must have been pruned by
4422 // art::PrepareForRegisterAllocation.
4423 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4424
4425 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
4426 if (intrinsic.TryDispatch(invoke)) {
4427 return;
4428 }
4429
4430 HandleInvoke(invoke);
4431 }
4432
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4433 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4434 if (invoke->GetLocations()->Intrinsified()) {
4435 IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4436 intrinsic.Dispatch(invoke);
4437 return true;
4438 }
4439 return false;
4440 }
4441
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)4442 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4443 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4444 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
4445 // On ARM64 we support all dispatch types.
4446 return desired_dispatch_info;
4447 }
4448
GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)4449 Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
4450 Location temp) {
4451 // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4452 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
4453 switch (invoke->GetMethodLoadKind()) {
4454 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4455 uint32_t offset =
4456 GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4457 // temp = thread->string_init_entrypoint
4458 __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4459 break;
4460 }
4461 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4462 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4463 break;
4464 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
4465 // Load method address from literal pool.
4466 __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
4467 break;
4468 case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
4469 // Add ADRP with its PC-relative DexCache access patch.
4470 const DexFile& dex_file = invoke->GetDexFileForPcRelativeDexCache();
4471 uint32_t element_offset = invoke->GetDexCacheArrayOffset();
4472 vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
4473 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4474 // Add LDR with its PC-relative DexCache access patch.
4475 vixl::aarch64::Label* ldr_label =
4476 NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
4477 EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4478 break;
4479 }
4480 case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
4481 Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4482 Register reg = XRegisterFrom(temp);
4483 Register method_reg;
4484 if (current_method.IsRegister()) {
4485 method_reg = XRegisterFrom(current_method);
4486 } else {
4487 DCHECK(invoke->GetLocations()->Intrinsified());
4488 DCHECK(!current_method.IsValid());
4489 method_reg = reg;
4490 __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
4491 }
4492
4493 // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
4494 __ Ldr(reg.X(),
4495 MemOperand(method_reg.X(),
4496 ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value()));
4497 // temp = temp[index_in_cache];
4498 // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
4499 uint32_t index_in_cache = invoke->GetDexMethodIndex();
4500 __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache)));
4501 break;
4502 }
4503 }
4504 return callee_method;
4505 }
4506
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)4507 void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
4508 // All registers are assumed to be correctly set up.
4509 Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
4510
4511 switch (invoke->GetCodePtrLocation()) {
4512 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4513 __ Bl(&frame_entry_label_);
4514 break;
4515 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4516 // LR = callee_method->entry_point_from_quick_compiled_code_;
4517 __ Ldr(lr, MemOperand(
4518 XRegisterFrom(callee_method),
4519 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
4520 {
4521 // To ensure that the pc position is recorded immediately after the `blr` instruction
4522 // BLR must be the last instruction emitted in this function.
4523 // Recording the pc will occur right after returning from this function.
4524 ExactAssemblyScope eas(GetVIXLAssembler(),
4525 kInstructionSize,
4526 CodeBufferCheckScope::kExactSize);
4527 // lr()
4528 __ blr(lr);
4529 }
4530 break;
4531 }
4532
4533 DCHECK(!IsLeafMethod());
4534 }
4535
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in)4536 void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
4537 // Use the calling convention instead of the location of the receiver, as
4538 // intrinsics may have put the receiver in a different register. In the intrinsics
4539 // slow path, the arguments have been moved to the right place, so here we are
4540 // guaranteed that the receiver is the first register of the calling convention.
4541 InvokeDexCallingConvention calling_convention;
4542 Register receiver = calling_convention.GetRegisterAt(0);
4543 Register temp = XRegisterFrom(temp_in);
4544 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4545 invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4546 Offset class_offset = mirror::Object::ClassOffset();
4547 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4548
4549 DCHECK(receiver.IsRegister());
4550
4551 {
4552 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4553 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4554 // /* HeapReference<Class> */ temp = receiver->klass_
4555 __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4556 MaybeRecordImplicitNullCheck(invoke);
4557 }
4558 // Instead of simply (possibly) unpoisoning `temp` here, we should
4559 // emit a read barrier for the previous class reference load.
4560 // intermediate/temporary reference and because the current
4561 // concurrent copying collector keeps the from-space memory
4562 // intact/accessible until the end of the marking phase (the
4563 // concurrent copying collector may not in the future).
4564 GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4565 // temp = temp->GetMethodAt(method_offset);
4566 __ Ldr(temp, MemOperand(temp, method_offset));
4567 // lr = temp->GetEntryPoint();
4568 __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4569 {
4570 // To ensure that the pc position is recorded immediately after the `blr` instruction
4571 // BLR should be the last instruction emitted in this function.
4572 // Recording the pc will occur right after returning from this function.
4573 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4574 // lr();
4575 __ blr(lr);
4576 }
4577 }
4578
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4579 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4580 HandleInvoke(invoke);
4581 }
4582
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4583 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4584 codegen_->GenerateInvokePolymorphicCall(invoke);
4585 }
4586
NewPcRelativeStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4587 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
4588 const DexFile& dex_file,
4589 dex::StringIndex string_index,
4590 vixl::aarch64::Label* adrp_label) {
4591 return
4592 NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
4593 }
4594
NewPcRelativeTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4595 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
4596 const DexFile& dex_file,
4597 dex::TypeIndex type_index,
4598 vixl::aarch64::Label* adrp_label) {
4599 return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_);
4600 }
4601
NewBssEntryTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4602 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
4603 const DexFile& dex_file,
4604 dex::TypeIndex type_index,
4605 vixl::aarch64::Label* adrp_label) {
4606 return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
4607 }
4608
NewPcRelativeDexCacheArrayPatch(const DexFile & dex_file,uint32_t element_offset,vixl::aarch64::Label * adrp_label)4609 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
4610 const DexFile& dex_file,
4611 uint32_t element_offset,
4612 vixl::aarch64::Label* adrp_label) {
4613 return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
4614 }
4615
NewBakerReadBarrierPatch(uint32_t custom_data)4616 vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
4617 baker_read_barrier_patches_.emplace_back(custom_data);
4618 return &baker_read_barrier_patches_.back().label;
4619 }
4620
NewPcRelativePatch(const DexFile & dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)4621 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
4622 const DexFile& dex_file,
4623 uint32_t offset_or_index,
4624 vixl::aarch64::Label* adrp_label,
4625 ArenaDeque<PcRelativePatchInfo>* patches) {
4626 // Add a patch entry and return the label.
4627 patches->emplace_back(dex_file, offset_or_index);
4628 PcRelativePatchInfo* info = &patches->back();
4629 vixl::aarch64::Label* label = &info->label;
4630 // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
4631 info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
4632 return label;
4633 }
4634
DeduplicateBootImageStringLiteral(const DexFile & dex_file,dex::StringIndex string_index)4635 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
4636 const DexFile& dex_file, dex::StringIndex string_index) {
4637 return boot_image_string_patches_.GetOrCreate(
4638 StringReference(&dex_file, string_index),
4639 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4640 }
4641
DeduplicateBootImageTypeLiteral(const DexFile & dex_file,dex::TypeIndex type_index)4642 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
4643 const DexFile& dex_file, dex::TypeIndex type_index) {
4644 return boot_image_type_patches_.GetOrCreate(
4645 TypeReference(&dex_file, type_index),
4646 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4647 }
4648
DeduplicateBootImageAddressLiteral(uint64_t address)4649 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
4650 uint64_t address) {
4651 return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
4652 }
4653
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)4654 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
4655 const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
4656 jit_string_roots_.Overwrite(StringReference(&dex_file, string_index),
4657 reinterpret_cast64<uint64_t>(handle.GetReference()));
4658 return jit_string_patches_.GetOrCreate(
4659 StringReference(&dex_file, string_index),
4660 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4661 }
4662
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)4663 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
4664 const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
4665 jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index),
4666 reinterpret_cast64<uint64_t>(handle.GetReference()));
4667 return jit_class_patches_.GetOrCreate(
4668 TypeReference(&dex_file, type_index),
4669 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4670 }
4671
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)4672 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
4673 vixl::aarch64::Register reg) {
4674 DCHECK(reg.IsX());
4675 SingleEmissionCheckScope guard(GetVIXLAssembler());
4676 __ Bind(fixup_label);
4677 __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
4678 }
4679
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4680 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
4681 vixl::aarch64::Register out,
4682 vixl::aarch64::Register base) {
4683 DCHECK(out.IsX());
4684 DCHECK(base.IsX());
4685 SingleEmissionCheckScope guard(GetVIXLAssembler());
4686 __ Bind(fixup_label);
4687 __ add(out, base, Operand(/* offset placeholder */ 0));
4688 }
4689
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4690 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
4691 vixl::aarch64::Register out,
4692 vixl::aarch64::Register base) {
4693 DCHECK(base.IsX());
4694 SingleEmissionCheckScope guard(GetVIXLAssembler());
4695 __ Bind(fixup_label);
4696 __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
4697 }
4698
4699 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<LinkerPatch> * linker_patches)4700 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
4701 const ArenaDeque<PcRelativePatchInfo>& infos,
4702 ArenaVector<LinkerPatch>* linker_patches) {
4703 for (const PcRelativePatchInfo& info : infos) {
4704 linker_patches->push_back(Factory(info.label.GetLocation(),
4705 &info.target_dex_file,
4706 info.pc_insn_label->GetLocation(),
4707 info.offset_or_index));
4708 }
4709 }
4710
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)4711 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
4712 DCHECK(linker_patches->empty());
4713 size_t size =
4714 pc_relative_dex_cache_patches_.size() +
4715 boot_image_string_patches_.size() +
4716 pc_relative_string_patches_.size() +
4717 boot_image_type_patches_.size() +
4718 pc_relative_type_patches_.size() +
4719 type_bss_entry_patches_.size() +
4720 baker_read_barrier_patches_.size();
4721 linker_patches->reserve(size);
4722 for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
4723 linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
4724 &info.target_dex_file,
4725 info.pc_insn_label->GetLocation(),
4726 info.offset_or_index));
4727 }
4728 for (const auto& entry : boot_image_string_patches_) {
4729 const StringReference& target_string = entry.first;
4730 vixl::aarch64::Literal<uint32_t>* literal = entry.second;
4731 linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
4732 target_string.dex_file,
4733 target_string.string_index.index_));
4734 }
4735 if (!GetCompilerOptions().IsBootImage()) {
4736 DCHECK(pc_relative_type_patches_.empty());
4737 EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
4738 linker_patches);
4739 } else {
4740 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
4741 linker_patches);
4742 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
4743 linker_patches);
4744 }
4745 EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
4746 linker_patches);
4747 for (const auto& entry : boot_image_type_patches_) {
4748 const TypeReference& target_type = entry.first;
4749 vixl::aarch64::Literal<uint32_t>* literal = entry.second;
4750 linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
4751 target_type.dex_file,
4752 target_type.type_index.index_));
4753 }
4754 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
4755 linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
4756 info.custom_data));
4757 }
4758 DCHECK_EQ(size, linker_patches->size());
4759 }
4760
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)4761 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
4762 Uint32ToLiteralMap* map) {
4763 return map->GetOrCreate(
4764 value,
4765 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
4766 }
4767
DeduplicateUint64Literal(uint64_t value)4768 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
4769 return uint64_literals_.GetOrCreate(
4770 value,
4771 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
4772 }
4773
DeduplicateMethodLiteral(MethodReference target_method,MethodToLiteralMap * map)4774 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
4775 MethodReference target_method,
4776 MethodToLiteralMap* map) {
4777 return map->GetOrCreate(
4778 target_method,
4779 [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
4780 }
4781
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4782 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4783 // Explicit clinit checks triggered by static invokes must have been pruned by
4784 // art::PrepareForRegisterAllocation.
4785 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4786
4787 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4788 return;
4789 }
4790
4791 // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
4792 // are no pools emitted.
4793 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4794 LocationSummary* locations = invoke->GetLocations();
4795 codegen_->GenerateStaticOrDirectCall(
4796 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
4797 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4798 }
4799
VisitInvokeVirtual(HInvokeVirtual * invoke)4800 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4801 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4802 return;
4803 }
4804
4805 // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
4806 // are no pools emitted.
4807 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4808 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
4809 DCHECK(!codegen_->IsLeafMethod());
4810 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4811 }
4812
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)4813 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
4814 HLoadClass::LoadKind desired_class_load_kind) {
4815 switch (desired_class_load_kind) {
4816 case HLoadClass::LoadKind::kInvalid:
4817 LOG(FATAL) << "UNREACHABLE";
4818 UNREACHABLE();
4819 case HLoadClass::LoadKind::kReferrersClass:
4820 break;
4821 case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
4822 DCHECK(!GetCompilerOptions().GetCompilePic());
4823 break;
4824 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
4825 DCHECK(GetCompilerOptions().GetCompilePic());
4826 break;
4827 case HLoadClass::LoadKind::kBootImageAddress:
4828 break;
4829 case HLoadClass::LoadKind::kBssEntry:
4830 DCHECK(!Runtime::Current()->UseJitCompilation());
4831 break;
4832 case HLoadClass::LoadKind::kJitTableAddress:
4833 DCHECK(Runtime::Current()->UseJitCompilation());
4834 break;
4835 case HLoadClass::LoadKind::kDexCacheViaMethod:
4836 break;
4837 }
4838 return desired_class_load_kind;
4839 }
4840
VisitLoadClass(HLoadClass * cls)4841 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
4842 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4843 if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
4844 InvokeRuntimeCallingConvention calling_convention;
4845 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
4846 cls,
4847 LocationFrom(calling_convention.GetRegisterAt(0)),
4848 LocationFrom(vixl::aarch64::x0));
4849 DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
4850 return;
4851 }
4852 DCHECK(!cls->NeedsAccessCheck());
4853
4854 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
4855 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
4856 ? LocationSummary::kCallOnSlowPath
4857 : LocationSummary::kNoCall;
4858 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
4859 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
4860 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4861 }
4862
4863 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
4864 locations->SetInAt(0, Location::RequiresRegister());
4865 }
4866 locations->SetOut(Location::RequiresRegister());
4867 if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
4868 if (!kUseReadBarrier || kUseBakerReadBarrier) {
4869 // Rely on the type resolution or initialization and marking to save everything we need.
4870 locations->AddTemp(FixedTempLocation());
4871 RegisterSet caller_saves = RegisterSet::Empty();
4872 InvokeRuntimeCallingConvention calling_convention;
4873 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
4874 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
4875 RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
4876 Primitive::kPrimNot).GetCode());
4877 locations->SetCustomSlowPathCallerSaves(caller_saves);
4878 } else {
4879 // For non-Baker read barrier we have a temp-clobbering call.
4880 }
4881 }
4882 }
4883
4884 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
4885 // move.
VisitLoadClass(HLoadClass * cls)4886 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
4887 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4888 if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
4889 codegen_->GenerateLoadClassRuntimeCall(cls);
4890 return;
4891 }
4892 DCHECK(!cls->NeedsAccessCheck());
4893
4894 Location out_loc = cls->GetLocations()->Out();
4895 Register out = OutputRegister(cls);
4896 Register bss_entry_temp;
4897 vixl::aarch64::Label* bss_entry_adrp_label = nullptr;
4898
4899 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
4900 ? kWithoutReadBarrier
4901 : kCompilerReadBarrierOption;
4902 bool generate_null_check = false;
4903 switch (load_kind) {
4904 case HLoadClass::LoadKind::kReferrersClass: {
4905 DCHECK(!cls->CanCallRuntime());
4906 DCHECK(!cls->MustGenerateClinitCheck());
4907 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
4908 Register current_method = InputRegisterAt(cls, 0);
4909 GenerateGcRootFieldLoad(cls,
4910 out_loc,
4911 current_method,
4912 ArtMethod::DeclaringClassOffset().Int32Value(),
4913 /* fixup_label */ nullptr,
4914 read_barrier_option);
4915 break;
4916 }
4917 case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
4918 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4919 __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
4920 cls->GetTypeIndex()));
4921 break;
4922 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
4923 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4924 // Add ADRP with its PC-relative type patch.
4925 const DexFile& dex_file = cls->GetDexFile();
4926 dex::TypeIndex type_index = cls->GetTypeIndex();
4927 vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
4928 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4929 // Add ADD with its PC-relative type patch.
4930 vixl::aarch64::Label* add_label =
4931 codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
4932 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
4933 break;
4934 }
4935 case HLoadClass::LoadKind::kBootImageAddress: {
4936 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4937 uint32_t address = dchecked_integral_cast<uint32_t>(
4938 reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
4939 DCHECK_NE(address, 0u);
4940 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
4941 break;
4942 }
4943 case HLoadClass::LoadKind::kBssEntry: {
4944 // Add ADRP with its PC-relative Class .bss entry patch.
4945 const DexFile& dex_file = cls->GetDexFile();
4946 dex::TypeIndex type_index = cls->GetTypeIndex();
4947 bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0));
4948 bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
4949 codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp);
4950 // Add LDR with its PC-relative Class patch.
4951 vixl::aarch64::Label* ldr_label =
4952 codegen_->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label);
4953 // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
4954 GenerateGcRootFieldLoad(cls,
4955 out_loc,
4956 bss_entry_temp,
4957 /* offset placeholder */ 0u,
4958 ldr_label,
4959 read_barrier_option);
4960 generate_null_check = true;
4961 break;
4962 }
4963 case HLoadClass::LoadKind::kJitTableAddress: {
4964 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
4965 cls->GetTypeIndex(),
4966 cls->GetClass()));
4967 GenerateGcRootFieldLoad(cls,
4968 out_loc,
4969 out.X(),
4970 /* offset */ 0,
4971 /* fixup_label */ nullptr,
4972 read_barrier_option);
4973 break;
4974 }
4975 case HLoadClass::LoadKind::kDexCacheViaMethod:
4976 case HLoadClass::LoadKind::kInvalid:
4977 LOG(FATAL) << "UNREACHABLE";
4978 UNREACHABLE();
4979 }
4980
4981 bool do_clinit = cls->MustGenerateClinitCheck();
4982 if (generate_null_check || do_clinit) {
4983 DCHECK(cls->CanCallRuntime());
4984 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
4985 cls, cls, cls->GetDexPc(), do_clinit, bss_entry_temp, bss_entry_adrp_label);
4986 codegen_->AddSlowPath(slow_path);
4987 if (generate_null_check) {
4988 __ Cbz(out, slow_path->GetEntryLabel());
4989 }
4990 if (cls->MustGenerateClinitCheck()) {
4991 GenerateClassInitializationCheck(slow_path, out);
4992 } else {
4993 __ Bind(slow_path->GetExitLabel());
4994 }
4995 }
4996 }
4997
GetExceptionTlsAddress()4998 static MemOperand GetExceptionTlsAddress() {
4999 return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
5000 }
5001
VisitLoadException(HLoadException * load)5002 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
5003 LocationSummary* locations =
5004 new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5005 locations->SetOut(Location::RequiresRegister());
5006 }
5007
VisitLoadException(HLoadException * instruction)5008 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
5009 __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
5010 }
5011
VisitClearException(HClearException * clear)5012 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
5013 new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5014 }
5015
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5016 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5017 __ Str(wzr, GetExceptionTlsAddress());
5018 }
5019
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5020 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
5021 HLoadString::LoadKind desired_string_load_kind) {
5022 switch (desired_string_load_kind) {
5023 case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5024 DCHECK(!GetCompilerOptions().GetCompilePic());
5025 break;
5026 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5027 DCHECK(GetCompilerOptions().GetCompilePic());
5028 break;
5029 case HLoadString::LoadKind::kBootImageAddress:
5030 break;
5031 case HLoadString::LoadKind::kBssEntry:
5032 DCHECK(!Runtime::Current()->UseJitCompilation());
5033 break;
5034 case HLoadString::LoadKind::kJitTableAddress:
5035 DCHECK(Runtime::Current()->UseJitCompilation());
5036 break;
5037 case HLoadString::LoadKind::kDexCacheViaMethod:
5038 break;
5039 }
5040 return desired_string_load_kind;
5041 }
5042
VisitLoadString(HLoadString * load)5043 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5044 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5045 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5046 if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5047 InvokeRuntimeCallingConvention calling_convention;
5048 locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5049 } else {
5050 locations->SetOut(Location::RequiresRegister());
5051 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5052 if (!kUseReadBarrier || kUseBakerReadBarrier) {
5053 // Rely on the pResolveString and marking to save everything we need.
5054 locations->AddTemp(FixedTempLocation());
5055 RegisterSet caller_saves = RegisterSet::Empty();
5056 InvokeRuntimeCallingConvention calling_convention;
5057 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
5058 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
5059 RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
5060 Primitive::kPrimNot).GetCode());
5061 locations->SetCustomSlowPathCallerSaves(caller_saves);
5062 } else {
5063 // For non-Baker read barrier we have a temp-clobbering call.
5064 }
5065 }
5066 }
5067 }
5068
5069 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5070 // move.
VisitLoadString(HLoadString * load)5071 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5072 Register out = OutputRegister(load);
5073 Location out_loc = load->GetLocations()->Out();
5074
5075 switch (load->GetLoadKind()) {
5076 case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5077 __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
5078 load->GetStringIndex()));
5079 return; // No dex cache slow path.
5080 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5081 // Add ADRP with its PC-relative String patch.
5082 const DexFile& dex_file = load->GetDexFile();
5083 const dex::StringIndex string_index = load->GetStringIndex();
5084 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5085 vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
5086 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5087 // Add ADD with its PC-relative String patch.
5088 vixl::aarch64::Label* add_label =
5089 codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
5090 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5091 return; // No dex cache slow path.
5092 }
5093 case HLoadString::LoadKind::kBootImageAddress: {
5094 uint32_t address = dchecked_integral_cast<uint32_t>(
5095 reinterpret_cast<uintptr_t>(load->GetString().Get()));
5096 DCHECK_NE(address, 0u);
5097 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5098 return; // No dex cache slow path.
5099 }
5100 case HLoadString::LoadKind::kBssEntry: {
5101 // Add ADRP with its PC-relative String .bss entry patch.
5102 const DexFile& dex_file = load->GetDexFile();
5103 const dex::StringIndex string_index = load->GetStringIndex();
5104 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5105 Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0));
5106 vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
5107 codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5108 // Add LDR with its PC-relative String patch.
5109 vixl::aarch64::Label* ldr_label =
5110 codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
5111 // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
5112 GenerateGcRootFieldLoad(load,
5113 out_loc,
5114 temp,
5115 /* offset placeholder */ 0u,
5116 ldr_label,
5117 kCompilerReadBarrierOption);
5118 SlowPathCodeARM64* slow_path =
5119 new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
5120 codegen_->AddSlowPath(slow_path);
5121 __ Cbz(out.X(), slow_path->GetEntryLabel());
5122 __ Bind(slow_path->GetExitLabel());
5123 return;
5124 }
5125 case HLoadString::LoadKind::kJitTableAddress: {
5126 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5127 load->GetStringIndex(),
5128 load->GetString()));
5129 GenerateGcRootFieldLoad(load,
5130 out_loc,
5131 out.X(),
5132 /* offset */ 0,
5133 /* fixup_label */ nullptr,
5134 kCompilerReadBarrierOption);
5135 return;
5136 }
5137 default:
5138 break;
5139 }
5140
5141 // TODO: Re-add the compiler code to do string dex cache lookup again.
5142 InvokeRuntimeCallingConvention calling_convention;
5143 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5144 __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5145 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5146 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5147 }
5148
VisitLongConstant(HLongConstant * constant)5149 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5150 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
5151 locations->SetOut(Location::ConstantLocation(constant));
5152 }
5153
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)5154 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
5155 // Will be generated at use site.
5156 }
5157
VisitMonitorOperation(HMonitorOperation * instruction)5158 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5159 LocationSummary* locations =
5160 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5161 InvokeRuntimeCallingConvention calling_convention;
5162 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5163 }
5164
VisitMonitorOperation(HMonitorOperation * instruction)5165 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5166 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5167 instruction,
5168 instruction->GetDexPc());
5169 if (instruction->IsEnter()) {
5170 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5171 } else {
5172 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5173 }
5174 }
5175
VisitMul(HMul * mul)5176 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5177 LocationSummary* locations =
5178 new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
5179 switch (mul->GetResultType()) {
5180 case Primitive::kPrimInt:
5181 case Primitive::kPrimLong:
5182 locations->SetInAt(0, Location::RequiresRegister());
5183 locations->SetInAt(1, Location::RequiresRegister());
5184 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5185 break;
5186
5187 case Primitive::kPrimFloat:
5188 case Primitive::kPrimDouble:
5189 locations->SetInAt(0, Location::RequiresFpuRegister());
5190 locations->SetInAt(1, Location::RequiresFpuRegister());
5191 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5192 break;
5193
5194 default:
5195 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5196 }
5197 }
5198
VisitMul(HMul * mul)5199 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5200 switch (mul->GetResultType()) {
5201 case Primitive::kPrimInt:
5202 case Primitive::kPrimLong:
5203 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5204 break;
5205
5206 case Primitive::kPrimFloat:
5207 case Primitive::kPrimDouble:
5208 __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5209 break;
5210
5211 default:
5212 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5213 }
5214 }
5215
VisitNeg(HNeg * neg)5216 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5217 LocationSummary* locations =
5218 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
5219 switch (neg->GetResultType()) {
5220 case Primitive::kPrimInt:
5221 case Primitive::kPrimLong:
5222 locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5223 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5224 break;
5225
5226 case Primitive::kPrimFloat:
5227 case Primitive::kPrimDouble:
5228 locations->SetInAt(0, Location::RequiresFpuRegister());
5229 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5230 break;
5231
5232 default:
5233 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5234 }
5235 }
5236
VisitNeg(HNeg * neg)5237 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5238 switch (neg->GetResultType()) {
5239 case Primitive::kPrimInt:
5240 case Primitive::kPrimLong:
5241 __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5242 break;
5243
5244 case Primitive::kPrimFloat:
5245 case Primitive::kPrimDouble:
5246 __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5247 break;
5248
5249 default:
5250 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5251 }
5252 }
5253
VisitNewArray(HNewArray * instruction)5254 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5255 LocationSummary* locations =
5256 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5257 InvokeRuntimeCallingConvention calling_convention;
5258 locations->SetOut(LocationFrom(x0));
5259 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5260 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5261 }
5262
VisitNewArray(HNewArray * instruction)5263 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5264 // Note: if heap poisoning is enabled, the entry point takes cares
5265 // of poisoning the reference.
5266 QuickEntrypointEnum entrypoint =
5267 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
5268 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5269 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5270 }
5271
VisitNewInstance(HNewInstance * instruction)5272 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5273 LocationSummary* locations =
5274 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5275 InvokeRuntimeCallingConvention calling_convention;
5276 if (instruction->IsStringAlloc()) {
5277 locations->AddTemp(LocationFrom(kArtMethodRegister));
5278 } else {
5279 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5280 }
5281 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
5282 }
5283
VisitNewInstance(HNewInstance * instruction)5284 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5285 // Note: if heap poisoning is enabled, the entry point takes cares
5286 // of poisoning the reference.
5287 if (instruction->IsStringAlloc()) {
5288 // String is allocated through StringFactory. Call NewEmptyString entry point.
5289 Location temp = instruction->GetLocations()->GetTemp(0);
5290 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5291 __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
5292 __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
5293
5294 {
5295 // Ensure the pc position is recorded immediately after the `blr` instruction.
5296 ExactAssemblyScope eas(GetVIXLAssembler(),
5297 kInstructionSize,
5298 CodeBufferCheckScope::kExactSize);
5299 __ blr(lr);
5300 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
5301 }
5302 } else {
5303 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5304 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5305 }
5306 }
5307
VisitNot(HNot * instruction)5308 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5309 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5310 locations->SetInAt(0, Location::RequiresRegister());
5311 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5312 }
5313
VisitNot(HNot * instruction)5314 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5315 switch (instruction->GetResultType()) {
5316 case Primitive::kPrimInt:
5317 case Primitive::kPrimLong:
5318 __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5319 break;
5320
5321 default:
5322 LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5323 }
5324 }
5325
VisitBooleanNot(HBooleanNot * instruction)5326 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5327 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5328 locations->SetInAt(0, Location::RequiresRegister());
5329 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5330 }
5331
VisitBooleanNot(HBooleanNot * instruction)5332 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5333 __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5334 }
5335
VisitNullCheck(HNullCheck * instruction)5336 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5337 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5338 locations->SetInAt(0, Location::RequiresRegister());
5339 }
5340
GenerateImplicitNullCheck(HNullCheck * instruction)5341 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5342 if (CanMoveNullCheckToUser(instruction)) {
5343 return;
5344 }
5345 {
5346 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5347 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5348 Location obj = instruction->GetLocations()->InAt(0);
5349 __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5350 RecordPcInfo(instruction, instruction->GetDexPc());
5351 }
5352 }
5353
GenerateExplicitNullCheck(HNullCheck * instruction)5354 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5355 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction);
5356 AddSlowPath(slow_path);
5357
5358 LocationSummary* locations = instruction->GetLocations();
5359 Location obj = locations->InAt(0);
5360
5361 __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5362 }
5363
VisitNullCheck(HNullCheck * instruction)5364 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5365 codegen_->GenerateNullCheck(instruction);
5366 }
5367
VisitOr(HOr * instruction)5368 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5369 HandleBinaryOp(instruction);
5370 }
5371
VisitOr(HOr * instruction)5372 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5373 HandleBinaryOp(instruction);
5374 }
5375
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5376 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5377 LOG(FATAL) << "Unreachable";
5378 }
5379
VisitParallelMove(HParallelMove * instruction)5380 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5381 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5382 }
5383
VisitParameterValue(HParameterValue * instruction)5384 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5385 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5386 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5387 if (location.IsStackSlot()) {
5388 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5389 } else if (location.IsDoubleStackSlot()) {
5390 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5391 }
5392 locations->SetOut(location);
5393 }
5394
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5395 void InstructionCodeGeneratorARM64::VisitParameterValue(
5396 HParameterValue* instruction ATTRIBUTE_UNUSED) {
5397 // Nothing to do, the parameter is already at its location.
5398 }
5399
VisitCurrentMethod(HCurrentMethod * instruction)5400 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5401 LocationSummary* locations =
5402 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
5403 locations->SetOut(LocationFrom(kArtMethodRegister));
5404 }
5405
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5406 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5407 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5408 // Nothing to do, the method is already at its location.
5409 }
5410
VisitPhi(HPhi * instruction)5411 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5412 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5413 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5414 locations->SetInAt(i, Location::Any());
5415 }
5416 locations->SetOut(Location::Any());
5417 }
5418
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5419 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5420 LOG(FATAL) << "Unreachable";
5421 }
5422
VisitRem(HRem * rem)5423 void LocationsBuilderARM64::VisitRem(HRem* rem) {
5424 Primitive::Type type = rem->GetResultType();
5425 LocationSummary::CallKind call_kind =
5426 Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5427 : LocationSummary::kNoCall;
5428 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
5429
5430 switch (type) {
5431 case Primitive::kPrimInt:
5432 case Primitive::kPrimLong:
5433 locations->SetInAt(0, Location::RequiresRegister());
5434 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5435 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5436 break;
5437
5438 case Primitive::kPrimFloat:
5439 case Primitive::kPrimDouble: {
5440 InvokeRuntimeCallingConvention calling_convention;
5441 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
5442 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
5443 locations->SetOut(calling_convention.GetReturnLocation(type));
5444
5445 break;
5446 }
5447
5448 default:
5449 LOG(FATAL) << "Unexpected rem type " << type;
5450 }
5451 }
5452
VisitRem(HRem * rem)5453 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
5454 Primitive::Type type = rem->GetResultType();
5455
5456 switch (type) {
5457 case Primitive::kPrimInt:
5458 case Primitive::kPrimLong: {
5459 GenerateDivRemIntegral(rem);
5460 break;
5461 }
5462
5463 case Primitive::kPrimFloat:
5464 case Primitive::kPrimDouble: {
5465 QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod;
5466 codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
5467 if (type == Primitive::kPrimFloat) {
5468 CheckEntrypointTypes<kQuickFmodf, float, float, float>();
5469 } else {
5470 CheckEntrypointTypes<kQuickFmod, double, double, double>();
5471 }
5472 break;
5473 }
5474
5475 default:
5476 LOG(FATAL) << "Unexpected rem type " << type;
5477 UNREACHABLE();
5478 }
5479 }
5480
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5481 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5482 memory_barrier->SetLocations(nullptr);
5483 }
5484
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5485 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5486 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
5487 }
5488
VisitReturn(HReturn * instruction)5489 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
5490 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5491 Primitive::Type return_type = instruction->InputAt(0)->GetType();
5492 locations->SetInAt(0, ARM64ReturnLocation(return_type));
5493 }
5494
VisitReturn(HReturn * instruction ATTRIBUTE_UNUSED)5495 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) {
5496 codegen_->GenerateFrameExit();
5497 }
5498
VisitReturnVoid(HReturnVoid * instruction)5499 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
5500 instruction->SetLocations(nullptr);
5501 }
5502
VisitReturnVoid(HReturnVoid * instruction ATTRIBUTE_UNUSED)5503 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
5504 codegen_->GenerateFrameExit();
5505 }
5506
VisitRor(HRor * ror)5507 void LocationsBuilderARM64::VisitRor(HRor* ror) {
5508 HandleBinaryOp(ror);
5509 }
5510
VisitRor(HRor * ror)5511 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
5512 HandleBinaryOp(ror);
5513 }
5514
VisitShl(HShl * shl)5515 void LocationsBuilderARM64::VisitShl(HShl* shl) {
5516 HandleShift(shl);
5517 }
5518
VisitShl(HShl * shl)5519 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
5520 HandleShift(shl);
5521 }
5522
VisitShr(HShr * shr)5523 void LocationsBuilderARM64::VisitShr(HShr* shr) {
5524 HandleShift(shr);
5525 }
5526
VisitShr(HShr * shr)5527 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
5528 HandleShift(shr);
5529 }
5530
VisitSub(HSub * instruction)5531 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
5532 HandleBinaryOp(instruction);
5533 }
5534
VisitSub(HSub * instruction)5535 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
5536 HandleBinaryOp(instruction);
5537 }
5538
VisitStaticFieldGet(HStaticFieldGet * instruction)5539 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5540 HandleFieldGet(instruction, instruction->GetFieldInfo());
5541 }
5542
VisitStaticFieldGet(HStaticFieldGet * instruction)5543 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5544 HandleFieldGet(instruction, instruction->GetFieldInfo());
5545 }
5546
VisitStaticFieldSet(HStaticFieldSet * instruction)5547 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5548 HandleFieldSet(instruction);
5549 }
5550
VisitStaticFieldSet(HStaticFieldSet * instruction)5551 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5552 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5553 }
5554
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5555 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
5556 HUnresolvedInstanceFieldGet* instruction) {
5557 FieldAccessCallingConventionARM64 calling_convention;
5558 codegen_->CreateUnresolvedFieldLocationSummary(
5559 instruction, instruction->GetFieldType(), calling_convention);
5560 }
5561
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5562 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
5563 HUnresolvedInstanceFieldGet* instruction) {
5564 FieldAccessCallingConventionARM64 calling_convention;
5565 codegen_->GenerateUnresolvedFieldAccess(instruction,
5566 instruction->GetFieldType(),
5567 instruction->GetFieldIndex(),
5568 instruction->GetDexPc(),
5569 calling_convention);
5570 }
5571
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5572 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
5573 HUnresolvedInstanceFieldSet* instruction) {
5574 FieldAccessCallingConventionARM64 calling_convention;
5575 codegen_->CreateUnresolvedFieldLocationSummary(
5576 instruction, instruction->GetFieldType(), calling_convention);
5577 }
5578
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5579 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
5580 HUnresolvedInstanceFieldSet* instruction) {
5581 FieldAccessCallingConventionARM64 calling_convention;
5582 codegen_->GenerateUnresolvedFieldAccess(instruction,
5583 instruction->GetFieldType(),
5584 instruction->GetFieldIndex(),
5585 instruction->GetDexPc(),
5586 calling_convention);
5587 }
5588
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5589 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
5590 HUnresolvedStaticFieldGet* instruction) {
5591 FieldAccessCallingConventionARM64 calling_convention;
5592 codegen_->CreateUnresolvedFieldLocationSummary(
5593 instruction, instruction->GetFieldType(), calling_convention);
5594 }
5595
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5596 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
5597 HUnresolvedStaticFieldGet* instruction) {
5598 FieldAccessCallingConventionARM64 calling_convention;
5599 codegen_->GenerateUnresolvedFieldAccess(instruction,
5600 instruction->GetFieldType(),
5601 instruction->GetFieldIndex(),
5602 instruction->GetDexPc(),
5603 calling_convention);
5604 }
5605
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5606 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
5607 HUnresolvedStaticFieldSet* instruction) {
5608 FieldAccessCallingConventionARM64 calling_convention;
5609 codegen_->CreateUnresolvedFieldLocationSummary(
5610 instruction, instruction->GetFieldType(), calling_convention);
5611 }
5612
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5613 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
5614 HUnresolvedStaticFieldSet* instruction) {
5615 FieldAccessCallingConventionARM64 calling_convention;
5616 codegen_->GenerateUnresolvedFieldAccess(instruction,
5617 instruction->GetFieldType(),
5618 instruction->GetFieldIndex(),
5619 instruction->GetDexPc(),
5620 calling_convention);
5621 }
5622
VisitSuspendCheck(HSuspendCheck * instruction)5623 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5624 LocationSummary* locations =
5625 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5626 // In suspend check slow path, usually there are no caller-save registers at all.
5627 // If SIMD instructions are present, however, we force spilling all live SIMD
5628 // registers in full width (since the runtime only saves/restores lower part).
5629 locations->SetCustomSlowPathCallerSaves(
5630 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5631 }
5632
VisitSuspendCheck(HSuspendCheck * instruction)5633 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5634 HBasicBlock* block = instruction->GetBlock();
5635 if (block->GetLoopInformation() != nullptr) {
5636 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5637 // The back edge will generate the suspend check.
5638 return;
5639 }
5640 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5641 // The goto will generate the suspend check.
5642 return;
5643 }
5644 GenerateSuspendCheck(instruction, nullptr);
5645 }
5646
VisitThrow(HThrow * instruction)5647 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
5648 LocationSummary* locations =
5649 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5650 InvokeRuntimeCallingConvention calling_convention;
5651 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5652 }
5653
VisitThrow(HThrow * instruction)5654 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
5655 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5656 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5657 }
5658
VisitTypeConversion(HTypeConversion * conversion)5659 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
5660 LocationSummary* locations =
5661 new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
5662 Primitive::Type input_type = conversion->GetInputType();
5663 Primitive::Type result_type = conversion->GetResultType();
5664 DCHECK_NE(input_type, result_type);
5665 if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
5666 (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
5667 LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
5668 }
5669
5670 if (Primitive::IsFloatingPointType(input_type)) {
5671 locations->SetInAt(0, Location::RequiresFpuRegister());
5672 } else {
5673 locations->SetInAt(0, Location::RequiresRegister());
5674 }
5675
5676 if (Primitive::IsFloatingPointType(result_type)) {
5677 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5678 } else {
5679 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5680 }
5681 }
5682
VisitTypeConversion(HTypeConversion * conversion)5683 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
5684 Primitive::Type result_type = conversion->GetResultType();
5685 Primitive::Type input_type = conversion->GetInputType();
5686
5687 DCHECK_NE(input_type, result_type);
5688
5689 if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
5690 int result_size = Primitive::ComponentSize(result_type);
5691 int input_size = Primitive::ComponentSize(input_type);
5692 int min_size = std::min(result_size, input_size);
5693 Register output = OutputRegister(conversion);
5694 Register source = InputRegisterAt(conversion, 0);
5695 if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
5696 // 'int' values are used directly as W registers, discarding the top
5697 // bits, so we don't need to sign-extend and can just perform a move.
5698 // We do not pass the `kDiscardForSameWReg` argument to force clearing the
5699 // top 32 bits of the target register. We theoretically could leave those
5700 // bits unchanged, but we would have to make sure that no code uses a
5701 // 32bit input value as a 64bit value assuming that the top 32 bits are
5702 // zero.
5703 __ Mov(output.W(), source.W());
5704 } else if (result_type == Primitive::kPrimChar ||
5705 (input_type == Primitive::kPrimChar && input_size < result_size)) {
5706 __ Ubfx(output,
5707 output.IsX() ? source.X() : source.W(),
5708 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
5709 } else {
5710 __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
5711 }
5712 } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
5713 __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
5714 } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
5715 CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
5716 __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
5717 } else if (Primitive::IsFloatingPointType(result_type) &&
5718 Primitive::IsFloatingPointType(input_type)) {
5719 __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
5720 } else {
5721 LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
5722 << " to " << result_type;
5723 }
5724 }
5725
VisitUShr(HUShr * ushr)5726 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
5727 HandleShift(ushr);
5728 }
5729
VisitUShr(HUShr * ushr)5730 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
5731 HandleShift(ushr);
5732 }
5733
VisitXor(HXor * instruction)5734 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
5735 HandleBinaryOp(instruction);
5736 }
5737
VisitXor(HXor * instruction)5738 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
5739 HandleBinaryOp(instruction);
5740 }
5741
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5742 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5743 // Nothing to do, this should be removed during prepare for register allocator.
5744 LOG(FATAL) << "Unreachable";
5745 }
5746
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5747 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5748 // Nothing to do, this should be removed during prepare for register allocator.
5749 LOG(FATAL) << "Unreachable";
5750 }
5751
5752 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)5753 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5754 LocationSummary* locations =
5755 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
5756 locations->SetInAt(0, Location::RequiresRegister());
5757 }
5758
VisitPackedSwitch(HPackedSwitch * switch_instr)5759 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5760 int32_t lower_bound = switch_instr->GetStartValue();
5761 uint32_t num_entries = switch_instr->GetNumEntries();
5762 Register value_reg = InputRegisterAt(switch_instr, 0);
5763 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
5764
5765 // Roughly set 16 as max average assemblies generated per HIR in a graph.
5766 static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
5767 // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
5768 // make sure we don't emit it if the target may run out of range.
5769 // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
5770 // ranges and emit the tables only as required.
5771 static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
5772
5773 if (num_entries <= kPackedSwitchCompareJumpThreshold ||
5774 // Current instruction id is an upper bound of the number of HIRs in the graph.
5775 GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
5776 // Create a series of compare/jumps.
5777 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5778 Register temp = temps.AcquireW();
5779 __ Subs(temp, value_reg, Operand(lower_bound));
5780
5781 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
5782 // Jump to successors[0] if value == lower_bound.
5783 __ B(eq, codegen_->GetLabelOf(successors[0]));
5784 int32_t last_index = 0;
5785 for (; num_entries - last_index > 2; last_index += 2) {
5786 __ Subs(temp, temp, Operand(2));
5787 // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
5788 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
5789 // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
5790 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
5791 }
5792 if (num_entries - last_index == 2) {
5793 // The last missing case_value.
5794 __ Cmp(temp, Operand(1));
5795 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
5796 }
5797
5798 // And the default for any other value.
5799 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
5800 __ B(codegen_->GetLabelOf(default_block));
5801 }
5802 } else {
5803 JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
5804
5805 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5806
5807 // Below instructions should use at most one blocked register. Since there are two blocked
5808 // registers, we are free to block one.
5809 Register temp_w = temps.AcquireW();
5810 Register index;
5811 // Remove the bias.
5812 if (lower_bound != 0) {
5813 index = temp_w;
5814 __ Sub(index, value_reg, Operand(lower_bound));
5815 } else {
5816 index = value_reg;
5817 }
5818
5819 // Jump to default block if index is out of the range.
5820 __ Cmp(index, Operand(num_entries));
5821 __ B(hs, codegen_->GetLabelOf(default_block));
5822
5823 // In current VIXL implementation, it won't require any blocked registers to encode the
5824 // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
5825 // register pressure.
5826 Register table_base = temps.AcquireX();
5827 // Load jump offset from the table.
5828 __ Adr(table_base, jump_table->GetTableStartLabel());
5829 Register jump_offset = temp_w;
5830 __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
5831
5832 // Jump to target block by branching to table_base(pc related) + offset.
5833 Register target_address = table_base;
5834 __ Add(target_address, table_base, Operand(jump_offset, SXTW));
5835 __ Br(target_address);
5836 }
5837 }
5838
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5839 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
5840 HInstruction* instruction,
5841 Location out,
5842 uint32_t offset,
5843 Location maybe_temp,
5844 ReadBarrierOption read_barrier_option) {
5845 Primitive::Type type = Primitive::kPrimNot;
5846 Register out_reg = RegisterFrom(out, type);
5847 if (read_barrier_option == kWithReadBarrier) {
5848 CHECK(kEmitCompilerReadBarrier);
5849 if (kUseBakerReadBarrier) {
5850 // Load with fast path based Baker's read barrier.
5851 // /* HeapReference<Object> */ out = *(out + offset)
5852 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5853 out,
5854 out_reg,
5855 offset,
5856 maybe_temp,
5857 /* needs_null_check */ false,
5858 /* use_load_acquire */ false);
5859 } else {
5860 // Load with slow path based read barrier.
5861 // Save the value of `out` into `maybe_temp` before overwriting it
5862 // in the following move operation, as we will need it for the
5863 // read barrier below.
5864 Register temp_reg = RegisterFrom(maybe_temp, type);
5865 __ Mov(temp_reg, out_reg);
5866 // /* HeapReference<Object> */ out = *(out + offset)
5867 __ Ldr(out_reg, HeapOperand(out_reg, offset));
5868 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
5869 }
5870 } else {
5871 // Plain load with no read barrier.
5872 // /* HeapReference<Object> */ out = *(out + offset)
5873 __ Ldr(out_reg, HeapOperand(out_reg, offset));
5874 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5875 }
5876 }
5877
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5878 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
5879 HInstruction* instruction,
5880 Location out,
5881 Location obj,
5882 uint32_t offset,
5883 Location maybe_temp,
5884 ReadBarrierOption read_barrier_option) {
5885 Primitive::Type type = Primitive::kPrimNot;
5886 Register out_reg = RegisterFrom(out, type);
5887 Register obj_reg = RegisterFrom(obj, type);
5888 if (read_barrier_option == kWithReadBarrier) {
5889 CHECK(kEmitCompilerReadBarrier);
5890 if (kUseBakerReadBarrier) {
5891 // Load with fast path based Baker's read barrier.
5892 // /* HeapReference<Object> */ out = *(obj + offset)
5893 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5894 out,
5895 obj_reg,
5896 offset,
5897 maybe_temp,
5898 /* needs_null_check */ false,
5899 /* use_load_acquire */ false);
5900 } else {
5901 // Load with slow path based read barrier.
5902 // /* HeapReference<Object> */ out = *(obj + offset)
5903 __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5904 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
5905 }
5906 } else {
5907 // Plain load with no read barrier.
5908 // /* HeapReference<Object> */ out = *(obj + offset)
5909 __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5910 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5911 }
5912 }
5913
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)5914 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
5915 HInstruction* instruction,
5916 Location root,
5917 Register obj,
5918 uint32_t offset,
5919 vixl::aarch64::Label* fixup_label,
5920 ReadBarrierOption read_barrier_option) {
5921 DCHECK(fixup_label == nullptr || offset == 0u);
5922 Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
5923 if (read_barrier_option == kWithReadBarrier) {
5924 DCHECK(kEmitCompilerReadBarrier);
5925 if (kUseBakerReadBarrier) {
5926 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
5927 // Baker's read barrier are used.
5928 if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
5929 !Runtime::Current()->UseJitCompilation()) {
5930 // Note that we do not actually check the value of `GetIsGcMarking()`
5931 // to decide whether to mark the loaded GC root or not. Instead, we
5932 // load into `temp` the read barrier mark introspection entrypoint.
5933 // If `temp` is null, it means that `GetIsGcMarking()` is false, and
5934 // vice versa.
5935 //
5936 // We use link-time generated thunks for the slow path. That thunk
5937 // checks the reference and jumps to the entrypoint if needed.
5938 //
5939 // temp = Thread::Current()->pReadBarrierMarkIntrospection
5940 // lr = &return_address;
5941 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
5942 // if (temp != nullptr) {
5943 // goto gc_root_thunk<root_reg>(lr)
5944 // }
5945 // return_address:
5946
5947 UseScratchRegisterScope temps(GetVIXLAssembler());
5948 DCHECK(temps.IsAvailable(ip0));
5949 DCHECK(temps.IsAvailable(ip1));
5950 temps.Exclude(ip0, ip1);
5951 uint32_t custom_data =
5952 linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
5953 vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
5954
5955 // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
5956 DCHECK_EQ(ip0.GetCode(), 16u);
5957 const int32_t entry_point_offset =
5958 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
5959 __ Ldr(ip1, MemOperand(tr, entry_point_offset));
5960 EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
5961 vixl::aarch64::Label return_address;
5962 __ adr(lr, &return_address);
5963 if (fixup_label != nullptr) {
5964 __ Bind(fixup_label);
5965 }
5966 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
5967 "GC root LDR must be 2 instruction (8B) before the return address label.");
5968 __ ldr(root_reg, MemOperand(obj.X(), offset));
5969 __ Bind(cbnz_label);
5970 __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
5971 __ Bind(&return_address);
5972 } else {
5973 // Note that we do not actually check the value of
5974 // `GetIsGcMarking()` to decide whether to mark the loaded GC
5975 // root or not. Instead, we load into `temp` the read barrier
5976 // mark entry point corresponding to register `root`. If `temp`
5977 // is null, it means that `GetIsGcMarking()` is false, and vice
5978 // versa.
5979 //
5980 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
5981 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
5982 // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
5983 // // Slow path.
5984 // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
5985 // }
5986
5987 // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
5988 Register temp = lr;
5989 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
5990 instruction, root, /* entrypoint */ LocationFrom(temp));
5991 codegen_->AddSlowPath(slow_path);
5992
5993 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
5994 const int32_t entry_point_offset =
5995 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
5996 // Loading the entrypoint does not require a load acquire since it is only changed when
5997 // threads are suspended or running a checkpoint.
5998 __ Ldr(temp, MemOperand(tr, entry_point_offset));
5999
6000 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6001 if (fixup_label == nullptr) {
6002 __ Ldr(root_reg, MemOperand(obj, offset));
6003 } else {
6004 codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
6005 }
6006 static_assert(
6007 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6008 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6009 "have different sizes.");
6010 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6011 "art::mirror::CompressedReference<mirror::Object> and int32_t "
6012 "have different sizes.");
6013
6014 // The entrypoint is null when the GC is not marking, this prevents one load compared to
6015 // checking GetIsGcMarking.
6016 __ Cbnz(temp, slow_path->GetEntryLabel());
6017 __ Bind(slow_path->GetExitLabel());
6018 }
6019 } else {
6020 // GC root loaded through a slow path for read barriers other
6021 // than Baker's.
6022 // /* GcRoot<mirror::Object>* */ root = obj + offset
6023 if (fixup_label == nullptr) {
6024 __ Add(root_reg.X(), obj.X(), offset);
6025 } else {
6026 codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6027 }
6028 // /* mirror::Object* */ root = root->Read()
6029 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6030 }
6031 } else {
6032 // Plain GC root load with no read barrier.
6033 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6034 if (fixup_label == nullptr) {
6035 __ Ldr(root_reg, MemOperand(obj, offset));
6036 } else {
6037 codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6038 }
6039 // Note that GC roots are not affected by heap poisoning, thus we
6040 // do not have to unpoison `root_reg` here.
6041 }
6042 }
6043
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)6044 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6045 Location ref,
6046 Register obj,
6047 uint32_t offset,
6048 Location maybe_temp,
6049 bool needs_null_check,
6050 bool use_load_acquire) {
6051 DCHECK(kEmitCompilerReadBarrier);
6052 DCHECK(kUseBakerReadBarrier);
6053
6054 if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
6055 !use_load_acquire &&
6056 !Runtime::Current()->UseJitCompilation()) {
6057 // Note that we do not actually check the value of `GetIsGcMarking()`
6058 // to decide whether to mark the loaded GC root or not. Instead, we
6059 // load into `temp` the read barrier mark introspection entrypoint.
6060 // If `temp` is null, it means that `GetIsGcMarking()` is false, and
6061 // vice versa.
6062 //
6063 // We use link-time generated thunks for the slow path. That thunk checks
6064 // the holder and jumps to the entrypoint if needed. If the holder is not
6065 // gray, it creates a fake dependency and returns to the LDR instruction.
6066 //
6067 // temp = Thread::Current()->pReadBarrierMarkIntrospection
6068 // lr = &return_address;
6069 // if (temp != nullptr) {
6070 // goto field_thunk<holder_reg, base_reg>(lr)
6071 // }
6072 // not_gray_return_address:
6073 // // Original reference load. If the offset is too large to fit
6074 // // into LDR, we use an adjusted base register here.
6075 // GcRoot<mirror::Object> root = *(obj+offset);
6076 // gray_return_address:
6077
6078 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6079 Register base = obj;
6080 if (offset >= kReferenceLoadMinFarOffset) {
6081 DCHECK(maybe_temp.IsRegister());
6082 base = WRegisterFrom(maybe_temp);
6083 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6084 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6085 offset &= (kReferenceLoadMinFarOffset - 1u);
6086 }
6087 UseScratchRegisterScope temps(GetVIXLAssembler());
6088 DCHECK(temps.IsAvailable(ip0));
6089 DCHECK(temps.IsAvailable(ip1));
6090 temps.Exclude(ip0, ip1);
6091 uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
6092 base.GetCode(),
6093 obj.GetCode());
6094 vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
6095
6096 // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
6097 DCHECK_EQ(ip0.GetCode(), 16u);
6098 const int32_t entry_point_offset =
6099 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
6100 __ Ldr(ip1, MemOperand(tr, entry_point_offset));
6101 EmissionCheckScope guard(GetVIXLAssembler(),
6102 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6103 vixl::aarch64::Label return_address;
6104 __ adr(lr, &return_address);
6105 __ Bind(cbnz_label);
6106 __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
6107 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6108 "Field LDR must be 1 instruction (4B) before the return address label; "
6109 " 2 instructions (8B) for heap poisoning.");
6110 Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
6111 __ ldr(ref_reg, MemOperand(base.X(), offset));
6112 if (needs_null_check) {
6113 MaybeRecordImplicitNullCheck(instruction);
6114 }
6115 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6116 __ Bind(&return_address);
6117 return;
6118 }
6119
6120 // /* HeapReference<Object> */ ref = *(obj + offset)
6121 Register temp = WRegisterFrom(maybe_temp);
6122 Location no_index = Location::NoLocation();
6123 size_t no_scale_factor = 0u;
6124 GenerateReferenceLoadWithBakerReadBarrier(instruction,
6125 ref,
6126 obj,
6127 offset,
6128 no_index,
6129 no_scale_factor,
6130 temp,
6131 needs_null_check,
6132 use_load_acquire);
6133 }
6134
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,Register temp,bool needs_null_check)6135 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6136 Location ref,
6137 Register obj,
6138 uint32_t data_offset,
6139 Location index,
6140 Register temp,
6141 bool needs_null_check) {
6142 DCHECK(kEmitCompilerReadBarrier);
6143 DCHECK(kUseBakerReadBarrier);
6144
6145 // Array cells are never volatile variables, therefore array loads
6146 // never use Load-Acquire instructions on ARM64.
6147 const bool use_load_acquire = false;
6148
6149 static_assert(
6150 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6151 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6152 // /* HeapReference<Object> */ ref =
6153 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6154 size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
6155 GenerateReferenceLoadWithBakerReadBarrier(instruction,
6156 ref,
6157 obj,
6158 data_offset,
6159 index,
6160 scale_factor,
6161 temp,
6162 needs_null_check,
6163 use_load_acquire);
6164 }
6165
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,Register temp,bool needs_null_check,bool use_load_acquire,bool always_update_field)6166 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6167 Location ref,
6168 Register obj,
6169 uint32_t offset,
6170 Location index,
6171 size_t scale_factor,
6172 Register temp,
6173 bool needs_null_check,
6174 bool use_load_acquire,
6175 bool always_update_field) {
6176 DCHECK(kEmitCompilerReadBarrier);
6177 DCHECK(kUseBakerReadBarrier);
6178 // If we are emitting an array load, we should not be using a
6179 // Load Acquire instruction. In other words:
6180 // `instruction->IsArrayGet()` => `!use_load_acquire`.
6181 DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
6182
6183 // Query `art::Thread::Current()->GetIsGcMarking()` to decide
6184 // whether we need to enter the slow path to mark the reference.
6185 // Then, in the slow path, check the gray bit in the lock word of
6186 // the reference's holder (`obj`) to decide whether to mark `ref` or
6187 // not.
6188 //
6189 // Note that we do not actually check the value of `GetIsGcMarking()`;
6190 // instead, we load into `temp2` the read barrier mark entry point
6191 // corresponding to register `ref`. If `temp2` is null, it means
6192 // that `GetIsGcMarking()` is false, and vice versa.
6193 //
6194 // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6195 // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
6196 // // Slow path.
6197 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6198 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
6199 // HeapReference<mirror::Object> ref = *src; // Original reference load.
6200 // bool is_gray = (rb_state == ReadBarrier::GrayState());
6201 // if (is_gray) {
6202 // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
6203 // }
6204 // } else {
6205 // HeapReference<mirror::Object> ref = *src; // Original reference load.
6206 // }
6207
6208 // Slow path marking the object `ref` when the GC is marking. The
6209 // entrypoint will already be loaded in `temp2`.
6210 Register temp2 = lr;
6211 Location temp2_loc = LocationFrom(temp2);
6212 SlowPathCodeARM64* slow_path;
6213 if (always_update_field) {
6214 // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
6215 // only supports address of the form `obj + field_offset`, where
6216 // `obj` is a register and `field_offset` is a register. Thus
6217 // `offset` and `scale_factor` above are expected to be null in
6218 // this code path.
6219 DCHECK_EQ(offset, 0u);
6220 DCHECK_EQ(scale_factor, 0u); /* "times 1" */
6221 Location field_offset = index;
6222 slow_path =
6223 new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
6224 instruction,
6225 ref,
6226 obj,
6227 offset,
6228 /* index */ field_offset,
6229 scale_factor,
6230 needs_null_check,
6231 use_load_acquire,
6232 temp,
6233 /* entrypoint */ temp2_loc);
6234 } else {
6235 slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
6236 instruction,
6237 ref,
6238 obj,
6239 offset,
6240 index,
6241 scale_factor,
6242 needs_null_check,
6243 use_load_acquire,
6244 temp,
6245 /* entrypoint */ temp2_loc);
6246 }
6247 AddSlowPath(slow_path);
6248
6249 // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
6250 const int32_t entry_point_offset =
6251 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
6252 // Loading the entrypoint does not require a load acquire since it is only changed when
6253 // threads are suspended or running a checkpoint.
6254 __ Ldr(temp2, MemOperand(tr, entry_point_offset));
6255 // The entrypoint is null when the GC is not marking, this prevents one load compared to
6256 // checking GetIsGcMarking.
6257 __ Cbnz(temp2, slow_path->GetEntryLabel());
6258 // Fast path: just load the reference.
6259 GenerateRawReferenceLoad(
6260 instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
6261 __ Bind(slow_path->GetExitLabel());
6262 }
6263
GenerateRawReferenceLoad(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire)6264 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
6265 Location ref,
6266 Register obj,
6267 uint32_t offset,
6268 Location index,
6269 size_t scale_factor,
6270 bool needs_null_check,
6271 bool use_load_acquire) {
6272 DCHECK(obj.IsW());
6273 Primitive::Type type = Primitive::kPrimNot;
6274 Register ref_reg = RegisterFrom(ref, type);
6275
6276 // If needed, vixl::EmissionCheckScope guards are used to ensure
6277 // that no pools are emitted between the load (macro) instruction
6278 // and MaybeRecordImplicitNullCheck.
6279
6280 if (index.IsValid()) {
6281 // Load types involving an "index": ArrayGet,
6282 // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
6283 // intrinsics.
6284 if (use_load_acquire) {
6285 // UnsafeGetObjectVolatile intrinsic case.
6286 // Register `index` is not an index in an object array, but an
6287 // offset to an object reference field within object `obj`.
6288 DCHECK(instruction->IsInvoke()) << instruction->DebugName();
6289 DCHECK(instruction->GetLocations()->Intrinsified());
6290 DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
6291 << instruction->AsInvoke()->GetIntrinsic();
6292 DCHECK_EQ(offset, 0u);
6293 DCHECK_EQ(scale_factor, 0u);
6294 DCHECK_EQ(needs_null_check, false);
6295 // /* HeapReference<mirror::Object> */ ref = *(obj + index)
6296 MemOperand field = HeapOperand(obj, XRegisterFrom(index));
6297 LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
6298 } else {
6299 // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
6300 // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
6301 if (index.IsConstant()) {
6302 uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
6303 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6304 Load(type, ref_reg, HeapOperand(obj, computed_offset));
6305 if (needs_null_check) {
6306 MaybeRecordImplicitNullCheck(instruction);
6307 }
6308 } else {
6309 UseScratchRegisterScope temps(GetVIXLAssembler());
6310 Register temp = temps.AcquireW();
6311 __ Add(temp, obj, offset);
6312 {
6313 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6314 Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
6315 if (needs_null_check) {
6316 MaybeRecordImplicitNullCheck(instruction);
6317 }
6318 }
6319 }
6320 }
6321 } else {
6322 // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
6323 MemOperand field = HeapOperand(obj, offset);
6324 if (use_load_acquire) {
6325 // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
6326 LoadAcquire(instruction, ref_reg, field, needs_null_check);
6327 } else {
6328 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6329 Load(type, ref_reg, field);
6330 if (needs_null_check) {
6331 MaybeRecordImplicitNullCheck(instruction);
6332 }
6333 }
6334 }
6335
6336 // Object* ref = ref_addr->AsMirrorPtr()
6337 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6338 }
6339
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6340 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6341 Location out,
6342 Location ref,
6343 Location obj,
6344 uint32_t offset,
6345 Location index) {
6346 DCHECK(kEmitCompilerReadBarrier);
6347
6348 // Insert a slow path based read barrier *after* the reference load.
6349 //
6350 // If heap poisoning is enabled, the unpoisoning of the loaded
6351 // reference will be carried out by the runtime within the slow
6352 // path.
6353 //
6354 // Note that `ref` currently does not get unpoisoned (when heap
6355 // poisoning is enabled), which is alright as the `ref` argument is
6356 // not used by the artReadBarrierSlow entry point.
6357 //
6358 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6359 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
6360 ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6361 AddSlowPath(slow_path);
6362
6363 __ B(slow_path->GetEntryLabel());
6364 __ Bind(slow_path->GetExitLabel());
6365 }
6366
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6367 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6368 Location out,
6369 Location ref,
6370 Location obj,
6371 uint32_t offset,
6372 Location index) {
6373 if (kEmitCompilerReadBarrier) {
6374 // Baker's read barriers shall be handled by the fast path
6375 // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6376 DCHECK(!kUseBakerReadBarrier);
6377 // If heap poisoning is enabled, unpoisoning will be taken care of
6378 // by the runtime within the slow path.
6379 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6380 } else if (kPoisonHeapReferences) {
6381 GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6382 }
6383 }
6384
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6385 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6386 Location out,
6387 Location root) {
6388 DCHECK(kEmitCompilerReadBarrier);
6389
6390 // Insert a slow path based read barrier *after* the GC root load.
6391 //
6392 // Note that GC roots are not affected by heap poisoning, so we do
6393 // not need to do anything special for this here.
6394 SlowPathCodeARM64* slow_path =
6395 new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6396 AddSlowPath(slow_path);
6397
6398 __ B(slow_path->GetEntryLabel());
6399 __ Bind(slow_path->GetExitLabel());
6400 }
6401
VisitClassTableGet(HClassTableGet * instruction)6402 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6403 LocationSummary* locations =
6404 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6405 locations->SetInAt(0, Location::RequiresRegister());
6406 locations->SetOut(Location::RequiresRegister());
6407 }
6408
VisitClassTableGet(HClassTableGet * instruction)6409 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
6410 LocationSummary* locations = instruction->GetLocations();
6411 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
6412 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
6413 instruction->GetIndex(), kArm64PointerSize).SizeValue();
6414 __ Ldr(XRegisterFrom(locations->Out()),
6415 MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
6416 } else {
6417 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
6418 instruction->GetIndex(), kArm64PointerSize));
6419 __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
6420 mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
6421 __ Ldr(XRegisterFrom(locations->Out()),
6422 MemOperand(XRegisterFrom(locations->Out()), method_offset));
6423 }
6424 }
6425
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,vixl::aarch64::Literal<uint32_t> * literal,uint64_t index_in_table)6426 static void PatchJitRootUse(uint8_t* code,
6427 const uint8_t* roots_data,
6428 vixl::aarch64::Literal<uint32_t>* literal,
6429 uint64_t index_in_table) {
6430 uint32_t literal_offset = literal->GetOffset();
6431 uintptr_t address =
6432 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
6433 uint8_t* data = code + literal_offset;
6434 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
6435 }
6436
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)6437 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
6438 for (const auto& entry : jit_string_patches_) {
6439 const auto& it = jit_string_roots_.find(entry.first);
6440 DCHECK(it != jit_string_roots_.end());
6441 PatchJitRootUse(code, roots_data, entry.second, it->second);
6442 }
6443 for (const auto& entry : jit_class_patches_) {
6444 const auto& it = jit_class_roots_.find(entry.first);
6445 DCHECK(it != jit_class_roots_.end());
6446 PatchJitRootUse(code, roots_data, entry.second, it->second);
6447 }
6448 }
6449
6450 #undef __
6451 #undef QUICK_ENTRY_POINT
6452
6453 } // namespace arm64
6454 } // namespace art
6455