• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/asm_support_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "code_generator_utils.h"
23 #include "compiled_method.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "entrypoints/quick/quick_entrypoints_enum.h"
26 #include "gc/accounting/card_table.h"
27 #include "intrinsics.h"
28 #include "intrinsics_arm64.h"
29 #include "linker/arm64/relative_patcher_arm64.h"
30 #include "mirror/array-inl.h"
31 #include "mirror/class-inl.h"
32 #include "offsets.h"
33 #include "thread.h"
34 #include "utils/arm64/assembler_arm64.h"
35 #include "utils/assembler.h"
36 #include "utils/stack_checks.h"
37 
38 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
39 using vixl::ExactAssemblyScope;
40 using vixl::CodeBufferCheckScope;
41 using vixl::EmissionCheckScope;
42 
43 #ifdef __
44 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
45 #endif
46 
47 namespace art {
48 
49 template<class MirrorType>
50 class GcRoot;
51 
52 namespace arm64 {
53 
54 using helpers::ARM64EncodableConstantOrRegister;
55 using helpers::ArtVixlRegCodeCoherentForRegSet;
56 using helpers::CPURegisterFrom;
57 using helpers::DRegisterFrom;
58 using helpers::FPRegisterFrom;
59 using helpers::HeapOperand;
60 using helpers::HeapOperandFrom;
61 using helpers::InputCPURegisterAt;
62 using helpers::InputCPURegisterOrZeroRegAt;
63 using helpers::InputFPRegisterAt;
64 using helpers::InputOperandAt;
65 using helpers::InputRegisterAt;
66 using helpers::Int64ConstantFrom;
67 using helpers::IsConstantZeroBitPattern;
68 using helpers::LocationFrom;
69 using helpers::OperandFromMemOperand;
70 using helpers::OutputCPURegister;
71 using helpers::OutputFPRegister;
72 using helpers::OutputRegister;
73 using helpers::QRegisterFrom;
74 using helpers::RegisterFrom;
75 using helpers::StackOperandFrom;
76 using helpers::VIXLRegCodeFromART;
77 using helpers::WRegisterFrom;
78 using helpers::XRegisterFrom;
79 
80 static constexpr int kCurrentMethodStackOffset = 0;
81 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
82 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
83 // generates less code/data with a small num_entries.
84 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
85 
86 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
87 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
88 // For the Baker read barrier implementation using link-generated thunks we need to split
89 // the offset explicitly.
90 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
91 
92 // Flags controlling the use of link-time generated thunks for Baker read barriers.
93 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
94 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
95 
96 // Some instructions have special requirements for a temporary, for example
97 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
98 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
99 // loads with large offsets need a fixed register to limit the number of link-time
100 // thunks we generate. For these and similar cases, we want to reserve a specific
101 // register that's neither callee-save nor an argument register. We choose x15.
FixedTempLocation()102 inline Location FixedTempLocation() {
103   return Location::RegisterLocation(x15.GetCode());
104 }
105 
ARM64Condition(IfCondition cond)106 inline Condition ARM64Condition(IfCondition cond) {
107   switch (cond) {
108     case kCondEQ: return eq;
109     case kCondNE: return ne;
110     case kCondLT: return lt;
111     case kCondLE: return le;
112     case kCondGT: return gt;
113     case kCondGE: return ge;
114     case kCondB:  return lo;
115     case kCondBE: return ls;
116     case kCondA:  return hi;
117     case kCondAE: return hs;
118   }
119   LOG(FATAL) << "Unreachable";
120   UNREACHABLE();
121 }
122 
ARM64FPCondition(IfCondition cond,bool gt_bias)123 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
124   // The ARM64 condition codes can express all the necessary branches, see the
125   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
126   // There is no dex instruction or HIR that would need the missing conditions
127   // "equal or unordered" or "not equal".
128   switch (cond) {
129     case kCondEQ: return eq;
130     case kCondNE: return ne /* unordered */;
131     case kCondLT: return gt_bias ? cc : lt /* unordered */;
132     case kCondLE: return gt_bias ? ls : le /* unordered */;
133     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
134     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
135     default:
136       LOG(FATAL) << "UNREACHABLE";
137       UNREACHABLE();
138   }
139 }
140 
ARM64ReturnLocation(Primitive::Type return_type)141 Location ARM64ReturnLocation(Primitive::Type return_type) {
142   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
143   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
144   // but we use the exact registers for clarity.
145   if (return_type == Primitive::kPrimFloat) {
146     return LocationFrom(s0);
147   } else if (return_type == Primitive::kPrimDouble) {
148     return LocationFrom(d0);
149   } else if (return_type == Primitive::kPrimLong) {
150     return LocationFrom(x0);
151   } else if (return_type == Primitive::kPrimVoid) {
152     return Location::NoLocation();
153   } else {
154     return LocationFrom(w0);
155   }
156 }
157 
GetReturnLocation(Primitive::Type return_type)158 Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) {
159   return ARM64ReturnLocation(return_type);
160 }
161 
162 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
163 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
164 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
165 
166 // Calculate memory accessing operand for save/restore live registers.
SaveRestoreLiveRegistersHelper(CodeGenerator * codegen,LocationSummary * locations,int64_t spill_offset,bool is_save)167 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
168                                            LocationSummary* locations,
169                                            int64_t spill_offset,
170                                            bool is_save) {
171   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
172   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
173   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
174                                          codegen->GetNumberOfCoreRegisters(),
175                                          fp_spills,
176                                          codegen->GetNumberOfFloatingPointRegisters()));
177 
178   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
179   unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
180   CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
181 
182   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
183   UseScratchRegisterScope temps(masm);
184 
185   Register base = masm->StackPointer();
186   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
187   int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
188   int64_t reg_size = kXRegSizeInBytes;
189   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
190   uint32_t ls_access_size = WhichPowerOf2(reg_size);
191   if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
192       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
193     // If the offset does not fit in the instruction's immediate field, use an alternate register
194     // to compute the base address(float point registers spill base address).
195     Register new_base = temps.AcquireSameSizeAs(base);
196     __ Add(new_base, base, Operand(spill_offset + core_spill_size));
197     base = new_base;
198     spill_offset = -core_spill_size;
199     int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
200     DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
201     DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
202   }
203 
204   if (is_save) {
205     __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
206     __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
207   } else {
208     __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
209     __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
210   }
211 }
212 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)213 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
214   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
215   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
216   for (uint32_t i : LowToHighBits(core_spills)) {
217     // If the register holds an object, update the stack mask.
218     if (locations->RegisterContainsObject(i)) {
219       locations->SetStackBit(stack_offset / kVRegSize);
220     }
221     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
222     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
223     saved_core_stack_offsets_[i] = stack_offset;
224     stack_offset += kXRegSizeInBytes;
225   }
226 
227   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
228   for (uint32_t i : LowToHighBits(fp_spills)) {
229     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
230     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
231     saved_fpu_stack_offsets_[i] = stack_offset;
232     stack_offset += kDRegSizeInBytes;
233   }
234 
235   SaveRestoreLiveRegistersHelper(codegen,
236                                  locations,
237                                  codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
238 }
239 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)240 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
241   SaveRestoreLiveRegistersHelper(codegen,
242                                  locations,
243                                  codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
244 }
245 
246 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
247  public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)248   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
249 
EmitNativeCode(CodeGenerator * codegen)250   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
251     LocationSummary* locations = instruction_->GetLocations();
252     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
253 
254     __ Bind(GetEntryLabel());
255     if (instruction_->CanThrowIntoCatchBlock()) {
256       // Live registers will be restored in the catch block if caught.
257       SaveLiveRegisters(codegen, instruction_->GetLocations());
258     }
259     // We're moving two locations to locations that could overlap, so we need a parallel
260     // move resolver.
261     InvokeRuntimeCallingConvention calling_convention;
262     codegen->EmitParallelMoves(
263         locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
264         locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
265     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
266         ? kQuickThrowStringBounds
267         : kQuickThrowArrayBounds;
268     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
269     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
270     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
271   }
272 
IsFatal() const273   bool IsFatal() const OVERRIDE { return true; }
274 
GetDescription() const275   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
276 
277  private:
278   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
279 };
280 
281 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
282  public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)283   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
284 
EmitNativeCode(CodeGenerator * codegen)285   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
286     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
287     __ Bind(GetEntryLabel());
288     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
289     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
290   }
291 
IsFatal() const292   bool IsFatal() const OVERRIDE { return true; }
293 
GetDescription() const294   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
295 
296  private:
297   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
298 };
299 
300 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
301  public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit,vixl::aarch64::Register bss_entry_temp=vixl::aarch64::Register (),vixl::aarch64::Label * bss_entry_adrp_label=nullptr)302   LoadClassSlowPathARM64(HLoadClass* cls,
303                          HInstruction* at,
304                          uint32_t dex_pc,
305                          bool do_clinit,
306                          vixl::aarch64::Register bss_entry_temp = vixl::aarch64::Register(),
307                          vixl::aarch64::Label* bss_entry_adrp_label = nullptr)
308       : SlowPathCodeARM64(at),
309         cls_(cls),
310         dex_pc_(dex_pc),
311         do_clinit_(do_clinit),
312         bss_entry_temp_(bss_entry_temp),
313         bss_entry_adrp_label_(bss_entry_adrp_label) {
314     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
315   }
316 
EmitNativeCode(CodeGenerator * codegen)317   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
318     LocationSummary* locations = instruction_->GetLocations();
319     Location out = locations->Out();
320     constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier);
321     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
322 
323     InvokeRuntimeCallingConvention calling_convention;
324     // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp
325     // register, make sure it's not clobbered by the call or by saving/restoring registers.
326     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
327     bool is_load_class_bss_entry =
328         (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
329     if (is_load_class_bss_entry) {
330       DCHECK(bss_entry_temp_.IsValid());
331       DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0)));
332       DCHECK(
333           !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_));
334     }
335 
336     __ Bind(GetEntryLabel());
337     SaveLiveRegisters(codegen, locations);
338 
339     dex::TypeIndex type_index = cls_->GetTypeIndex();
340     __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
341     QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
342                                                 : kQuickInitializeType;
343     arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
344     if (do_clinit_) {
345       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
346     } else {
347       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
348     }
349 
350     // Move the class to the desired location.
351     if (out.IsValid()) {
352       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
353       Primitive::Type type = instruction_->GetType();
354       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
355     }
356     RestoreLiveRegisters(codegen, locations);
357     // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
358     if (is_load_class_bss_entry) {
359       DCHECK(out.IsValid());
360       const DexFile& dex_file = cls_->GetDexFile();
361       if (call_saves_everything_except_r0_ip0) {
362         // The class entry page address was preserved in bss_entry_temp_ thanks to kSaveEverything.
363       } else {
364         // For non-Baker read barrier, we need to re-calculate the address of the class entry page.
365         bss_entry_adrp_label_ = arm64_codegen->NewBssEntryTypePatch(dex_file, type_index);
366         arm64_codegen->EmitAdrpPlaceholder(bss_entry_adrp_label_, bss_entry_temp_);
367       }
368       vixl::aarch64::Label* strp_label =
369           arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label_);
370       {
371         SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
372         __ Bind(strp_label);
373         __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
374                MemOperand(bss_entry_temp_, /* offset placeholder */ 0));
375       }
376     }
377     __ B(GetExitLabel());
378   }
379 
GetDescription() const380   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
381 
382  private:
383   // The class this slow path will load.
384   HLoadClass* const cls_;
385 
386   // The dex PC of `at_`.
387   const uint32_t dex_pc_;
388 
389   // Whether to initialize the class.
390   const bool do_clinit_;
391 
392   // For HLoadClass/kBssEntry, the temp register and the label of the ADRP where it was loaded.
393   vixl::aarch64::Register bss_entry_temp_;
394   vixl::aarch64::Label* bss_entry_adrp_label_;
395 
396   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
397 };
398 
399 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
400  public:
LoadStringSlowPathARM64(HLoadString * instruction,Register temp,vixl::aarch64::Label * adrp_label)401   LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label)
402       : SlowPathCodeARM64(instruction),
403         temp_(temp),
404         adrp_label_(adrp_label) {}
405 
EmitNativeCode(CodeGenerator * codegen)406   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
407     LocationSummary* locations = instruction_->GetLocations();
408     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
409     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
410 
411     InvokeRuntimeCallingConvention calling_convention;
412     // Make sure `temp_` is not clobbered by the call or by saving/restoring registers.
413     DCHECK(temp_.IsValid());
414     DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0)));
415     DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_));
416 
417     __ Bind(GetEntryLabel());
418     SaveLiveRegisters(codegen, locations);
419 
420     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
421     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
422     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
423     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
424     Primitive::Type type = instruction_->GetType();
425     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
426 
427     RestoreLiveRegisters(codegen, locations);
428 
429     // Store the resolved String to the BSS entry.
430     const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile();
431     if (!kUseReadBarrier || kUseBakerReadBarrier) {
432       // The string entry page address was preserved in temp_ thanks to kSaveEverything.
433     } else {
434       // For non-Baker read barrier, we need to re-calculate the address of the string entry page.
435       adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
436       arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
437     }
438     vixl::aarch64::Label* strp_label =
439         arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
440     {
441       SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
442       __ Bind(strp_label);
443       __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
444              MemOperand(temp_, /* offset placeholder */ 0));
445     }
446 
447     __ B(GetExitLabel());
448   }
449 
GetDescription() const450   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
451 
452  private:
453   const Register temp_;
454   vixl::aarch64::Label* adrp_label_;
455 
456   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
457 };
458 
459 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
460  public:
NullCheckSlowPathARM64(HNullCheck * instr)461   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
462 
EmitNativeCode(CodeGenerator * codegen)463   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
464     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
465     __ Bind(GetEntryLabel());
466     if (instruction_->CanThrowIntoCatchBlock()) {
467       // Live registers will be restored in the catch block if caught.
468       SaveLiveRegisters(codegen, instruction_->GetLocations());
469     }
470     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
471                                  instruction_,
472                                  instruction_->GetDexPc(),
473                                  this);
474     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
475   }
476 
IsFatal() const477   bool IsFatal() const OVERRIDE { return true; }
478 
GetDescription() const479   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
480 
481  private:
482   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
483 };
484 
485 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
486  public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)487   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
488       : SlowPathCodeARM64(instruction), successor_(successor) {}
489 
EmitNativeCode(CodeGenerator * codegen)490   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
491     LocationSummary* locations = instruction_->GetLocations();
492     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
493     __ Bind(GetEntryLabel());
494     SaveLiveRegisters(codegen, locations);  // Only saves live 128-bit regs for SIMD.
495     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
496     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
497     RestoreLiveRegisters(codegen, locations);  // Only restores live 128-bit regs for SIMD.
498     if (successor_ == nullptr) {
499       __ B(GetReturnLabel());
500     } else {
501       __ B(arm64_codegen->GetLabelOf(successor_));
502     }
503   }
504 
GetReturnLabel()505   vixl::aarch64::Label* GetReturnLabel() {
506     DCHECK(successor_ == nullptr);
507     return &return_label_;
508   }
509 
GetSuccessor() const510   HBasicBlock* GetSuccessor() const {
511     return successor_;
512   }
513 
GetDescription() const514   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
515 
516  private:
517   // If not null, the block to branch to after the suspend check.
518   HBasicBlock* const successor_;
519 
520   // If `successor_` is null, the label to branch to after the suspend check.
521   vixl::aarch64::Label return_label_;
522 
523   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
524 };
525 
526 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
527  public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)528   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
529       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
530 
EmitNativeCode(CodeGenerator * codegen)531   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
532     LocationSummary* locations = instruction_->GetLocations();
533 
534     DCHECK(instruction_->IsCheckCast()
535            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
536     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
537     uint32_t dex_pc = instruction_->GetDexPc();
538 
539     __ Bind(GetEntryLabel());
540 
541     if (!is_fatal_) {
542       SaveLiveRegisters(codegen, locations);
543     }
544 
545     // We're moving two locations to locations that could overlap, so we need a parallel
546     // move resolver.
547     InvokeRuntimeCallingConvention calling_convention;
548     codegen->EmitParallelMoves(locations->InAt(0),
549                                LocationFrom(calling_convention.GetRegisterAt(0)),
550                                Primitive::kPrimNot,
551                                locations->InAt(1),
552                                LocationFrom(calling_convention.GetRegisterAt(1)),
553                                Primitive::kPrimNot);
554     if (instruction_->IsInstanceOf()) {
555       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
556       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
557       Primitive::Type ret_type = instruction_->GetType();
558       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
559       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
560     } else {
561       DCHECK(instruction_->IsCheckCast());
562       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
563       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
564     }
565 
566     if (!is_fatal_) {
567       RestoreLiveRegisters(codegen, locations);
568       __ B(GetExitLabel());
569     }
570   }
571 
GetDescription() const572   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
IsFatal() const573   bool IsFatal() const OVERRIDE { return is_fatal_; }
574 
575  private:
576   const bool is_fatal_;
577 
578   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
579 };
580 
581 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
582  public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)583   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
584       : SlowPathCodeARM64(instruction) {}
585 
EmitNativeCode(CodeGenerator * codegen)586   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
587     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
588     __ Bind(GetEntryLabel());
589     LocationSummary* locations = instruction_->GetLocations();
590     SaveLiveRegisters(codegen, locations);
591     InvokeRuntimeCallingConvention calling_convention;
592     __ Mov(calling_convention.GetRegisterAt(0),
593            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
594     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
595     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
596   }
597 
GetDescription() const598   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
599 
600  private:
601   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
602 };
603 
604 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
605  public:
ArraySetSlowPathARM64(HInstruction * instruction)606   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
607 
EmitNativeCode(CodeGenerator * codegen)608   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
609     LocationSummary* locations = instruction_->GetLocations();
610     __ Bind(GetEntryLabel());
611     SaveLiveRegisters(codegen, locations);
612 
613     InvokeRuntimeCallingConvention calling_convention;
614     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
615     parallel_move.AddMove(
616         locations->InAt(0),
617         LocationFrom(calling_convention.GetRegisterAt(0)),
618         Primitive::kPrimNot,
619         nullptr);
620     parallel_move.AddMove(
621         locations->InAt(1),
622         LocationFrom(calling_convention.GetRegisterAt(1)),
623         Primitive::kPrimInt,
624         nullptr);
625     parallel_move.AddMove(
626         locations->InAt(2),
627         LocationFrom(calling_convention.GetRegisterAt(2)),
628         Primitive::kPrimNot,
629         nullptr);
630     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
631 
632     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
633     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
634     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
635     RestoreLiveRegisters(codegen, locations);
636     __ B(GetExitLabel());
637   }
638 
GetDescription() const639   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
640 
641  private:
642   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
643 };
644 
EmitTable(CodeGeneratorARM64 * codegen)645 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
646   uint32_t num_entries = switch_instr_->GetNumEntries();
647   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
648 
649   // We are about to use the assembler to place literals directly. Make sure we have enough
650   // underlying code buffer and we have generated the jump table with right size.
651   EmissionCheckScope scope(codegen->GetVIXLAssembler(),
652                            num_entries * sizeof(int32_t),
653                            CodeBufferCheckScope::kExactSize);
654 
655   __ Bind(&table_start_);
656   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
657   for (uint32_t i = 0; i < num_entries; i++) {
658     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
659     DCHECK(target_label->IsBound());
660     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
661     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
662     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
663     Literal<int32_t> literal(jump_offset);
664     __ place(&literal);
665   }
666 }
667 
668 // Abstract base class for read barrier slow paths marking a reference
669 // `ref`.
670 //
671 // Argument `entrypoint` must be a register location holding the read
672 // barrier marking runtime entry point to be invoked.
673 class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
674  protected:
ReadBarrierMarkSlowPathBaseARM64(HInstruction * instruction,Location ref,Location entrypoint)675   ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
676       : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
677     DCHECK(kEmitCompilerReadBarrier);
678   }
679 
GetDescription() const680   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
681 
682   // Generate assembly code calling the read barrier marking runtime
683   // entry point (ReadBarrierMarkRegX).
GenerateReadBarrierMarkRuntimeCall(CodeGenerator * codegen)684   void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
685     // No need to save live registers; it's taken care of by the
686     // entrypoint. Also, there is no need to update the stack mask,
687     // as this runtime call will not trigger a garbage collection.
688     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
689     DCHECK_NE(ref_.reg(), LR);
690     DCHECK_NE(ref_.reg(), WSP);
691     DCHECK_NE(ref_.reg(), WZR);
692     // IP0 is used internally by the ReadBarrierMarkRegX entry point
693     // as a temporary, it cannot be the entry point's input/output.
694     DCHECK_NE(ref_.reg(), IP0);
695     DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
696     // "Compact" slow path, saving two moves.
697     //
698     // Instead of using the standard runtime calling convention (input
699     // and output in W0):
700     //
701     //   W0 <- ref
702     //   W0 <- ReadBarrierMark(W0)
703     //   ref <- W0
704     //
705     // we just use rX (the register containing `ref`) as input and output
706     // of a dedicated entrypoint:
707     //
708     //   rX <- ReadBarrierMarkRegX(rX)
709     //
710     if (entrypoint_.IsValid()) {
711       arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
712       __ Blr(XRegisterFrom(entrypoint_));
713     } else {
714       // Entrypoint is not already loaded, load from the thread.
715       int32_t entry_point_offset =
716           CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
717       // This runtime call does not require a stack map.
718       arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
719     }
720   }
721 
722   // The location (register) of the marked object reference.
723   const Location ref_;
724 
725   // The location of the entrypoint if it is already loaded.
726   const Location entrypoint_;
727 
728  private:
729   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
730 };
731 
732 // Slow path marking an object reference `ref` during a read
733 // barrier. The field `obj.field` in the object `obj` holding this
734 // reference does not get updated by this slow path after marking.
735 //
736 // This means that after the execution of this slow path, `ref` will
737 // always be up-to-date, but `obj.field` may not; i.e., after the
738 // flip, `ref` will be a to-space reference, but `obj.field` will
739 // probably still be a from-space reference (unless it gets updated by
740 // another thread, or if another thread installed another object
741 // reference (different from `ref`) in `obj.field`).
742 //
743 // If `entrypoint` is a valid location it is assumed to already be
744 // holding the entrypoint. The case where the entrypoint is passed in
745 // is when the decision to mark is based on whether the GC is marking.
746 class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
747  public:
ReadBarrierMarkSlowPathARM64(HInstruction * instruction,Location ref,Location entrypoint=Location::NoLocation ())748   ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
749                                Location ref,
750                                Location entrypoint = Location::NoLocation())
751       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
752     DCHECK(kEmitCompilerReadBarrier);
753   }
754 
GetDescription() const755   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
756 
EmitNativeCode(CodeGenerator * codegen)757   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
758     LocationSummary* locations = instruction_->GetLocations();
759     DCHECK(locations->CanCall());
760     DCHECK(ref_.IsRegister()) << ref_;
761     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
762     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
763         << "Unexpected instruction in read barrier marking slow path: "
764         << instruction_->DebugName();
765 
766     __ Bind(GetEntryLabel());
767     GenerateReadBarrierMarkRuntimeCall(codegen);
768     __ B(GetExitLabel());
769   }
770 
771  private:
772   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
773 };
774 
775 // Slow path loading `obj`'s lock word, loading a reference from
776 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
777 // marking `ref` if `obj` is gray according to the lock word (Baker
778 // read barrier). The field `obj.field` in the object `obj` holding
779 // this reference does not get updated by this slow path after marking
780 // (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
781 // below for that).
782 //
783 // This means that after the execution of this slow path, `ref` will
784 // always be up-to-date, but `obj.field` may not; i.e., after the
785 // flip, `ref` will be a to-space reference, but `obj.field` will
786 // probably still be a from-space reference (unless it gets updated by
787 // another thread, or if another thread installed another object
788 // reference (different from `ref`) in `obj.field`).
789 //
790 // Argument `entrypoint` must be a register location holding the read
791 // barrier marking runtime entry point to be invoked.
792 class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
793  public:
LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire,Register temp,Location entrypoint)794   LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
795                                                  Location ref,
796                                                  Register obj,
797                                                  uint32_t offset,
798                                                  Location index,
799                                                  size_t scale_factor,
800                                                  bool needs_null_check,
801                                                  bool use_load_acquire,
802                                                  Register temp,
803                                                  Location entrypoint)
804       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
805         obj_(obj),
806         offset_(offset),
807         index_(index),
808         scale_factor_(scale_factor),
809         needs_null_check_(needs_null_check),
810         use_load_acquire_(use_load_acquire),
811         temp_(temp) {
812     DCHECK(kEmitCompilerReadBarrier);
813     DCHECK(kUseBakerReadBarrier);
814   }
815 
GetDescription() const816   const char* GetDescription() const OVERRIDE {
817     return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
818   }
819 
EmitNativeCode(CodeGenerator * codegen)820   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
821     LocationSummary* locations = instruction_->GetLocations();
822     DCHECK(locations->CanCall());
823     DCHECK(ref_.IsRegister()) << ref_;
824     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
825     DCHECK(obj_.IsW());
826     DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
827     DCHECK(instruction_->IsInstanceFieldGet() ||
828            instruction_->IsStaticFieldGet() ||
829            instruction_->IsArrayGet() ||
830            instruction_->IsArraySet() ||
831            instruction_->IsInstanceOf() ||
832            instruction_->IsCheckCast() ||
833            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
834            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
835         << "Unexpected instruction in read barrier marking slow path: "
836         << instruction_->DebugName();
837     // The read barrier instrumentation of object ArrayGet
838     // instructions does not support the HIntermediateAddress
839     // instruction.
840     DCHECK(!(instruction_->IsArrayGet() &&
841              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
842 
843     // Temporary register `temp_`, used to store the lock word, must
844     // not be IP0 nor IP1, as we may use them to emit the reference
845     // load (in the call to GenerateRawReferenceLoad below), and we
846     // need the lock word to still be in `temp_` after the reference
847     // load.
848     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
849     DCHECK_NE(LocationFrom(temp_).reg(), IP1);
850 
851     __ Bind(GetEntryLabel());
852 
853     // When using MaybeGenerateReadBarrierSlow, the read barrier call is
854     // inserted after the original load. However, in fast path based
855     // Baker's read barriers, we need to perform the load of
856     // mirror::Object::monitor_ *before* the original reference load.
857     // This load-load ordering is required by the read barrier.
858     // The fast path/slow path (for Baker's algorithm) should look like:
859     //
860     //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
861     //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
862     //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
863     //   bool is_gray = (rb_state == ReadBarrier::GrayState());
864     //   if (is_gray) {
865     //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
866     //   }
867     //
868     // Note: the original implementation in ReadBarrier::Barrier is
869     // slightly more complex as it performs additional checks that we do
870     // not do here for performance reasons.
871 
872     // /* int32_t */ monitor = obj->monitor_
873     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
874     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
875     if (needs_null_check_) {
876       codegen->MaybeRecordImplicitNullCheck(instruction_);
877     }
878     // /* LockWord */ lock_word = LockWord(monitor)
879     static_assert(sizeof(LockWord) == sizeof(int32_t),
880                   "art::LockWord and int32_t have different sizes.");
881 
882     // Introduce a dependency on the lock_word including rb_state,
883     // to prevent load-load reordering, and without using
884     // a memory barrier (which would be more expensive).
885     // `obj` is unchanged by this operation, but its value now depends
886     // on `temp`.
887     __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
888 
889     // The actual reference load.
890     // A possible implicit null check has already been handled above.
891     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
892     arm64_codegen->GenerateRawReferenceLoad(instruction_,
893                                             ref_,
894                                             obj_,
895                                             offset_,
896                                             index_,
897                                             scale_factor_,
898                                             /* needs_null_check */ false,
899                                             use_load_acquire_);
900 
901     // Mark the object `ref` when `obj` is gray.
902     //
903     //   if (rb_state == ReadBarrier::GrayState())
904     //     ref = ReadBarrier::Mark(ref);
905     //
906     // Given the numeric representation, it's enough to check the low bit of the rb_state.
907     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
908     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
909     __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
910     GenerateReadBarrierMarkRuntimeCall(codegen);
911 
912     __ B(GetExitLabel());
913   }
914 
915  private:
916   // The register containing the object holding the marked object reference field.
917   Register obj_;
918   // The offset, index and scale factor to access the reference in `obj_`.
919   uint32_t offset_;
920   Location index_;
921   size_t scale_factor_;
922   // Is a null check required?
923   bool needs_null_check_;
924   // Should this reference load use Load-Acquire semantics?
925   bool use_load_acquire_;
926   // A temporary register used to hold the lock word of `obj_`.
927   Register temp_;
928 
929   DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
930 };
931 
932 // Slow path loading `obj`'s lock word, loading a reference from
933 // object `*(obj + offset + (index << scale_factor))` into `ref`, and
934 // marking `ref` if `obj` is gray according to the lock word (Baker
935 // read barrier). If needed, this slow path also atomically updates
936 // the field `obj.field` in the object `obj` holding this reference
937 // after marking (contrary to
938 // LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
939 // tries to update `obj.field`).
940 //
941 // This means that after the execution of this slow path, both `ref`
942 // and `obj.field` will be up-to-date; i.e., after the flip, both will
943 // hold the same to-space reference (unless another thread installed
944 // another object reference (different from `ref`) in `obj.field`).
945 //
946 // Argument `entrypoint` must be a register location holding the read
947 // barrier marking runtime entry point to be invoked.
948 class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
949     : public ReadBarrierMarkSlowPathBaseARM64 {
950  public:
LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire,Register temp,Location entrypoint)951   LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
952                                                                Location ref,
953                                                                Register obj,
954                                                                uint32_t offset,
955                                                                Location index,
956                                                                size_t scale_factor,
957                                                                bool needs_null_check,
958                                                                bool use_load_acquire,
959                                                                Register temp,
960                                                                Location entrypoint)
961       : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
962         obj_(obj),
963         offset_(offset),
964         index_(index),
965         scale_factor_(scale_factor),
966         needs_null_check_(needs_null_check),
967         use_load_acquire_(use_load_acquire),
968         temp_(temp) {
969     DCHECK(kEmitCompilerReadBarrier);
970     DCHECK(kUseBakerReadBarrier);
971   }
972 
GetDescription() const973   const char* GetDescription() const OVERRIDE {
974     return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
975   }
976 
EmitNativeCode(CodeGenerator * codegen)977   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
978     LocationSummary* locations = instruction_->GetLocations();
979     Register ref_reg = WRegisterFrom(ref_);
980     DCHECK(locations->CanCall());
981     DCHECK(ref_.IsRegister()) << ref_;
982     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
983     DCHECK(obj_.IsW());
984     DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
985 
986     // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
987     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
988         << "Unexpected instruction in read barrier marking and field updating slow path: "
989         << instruction_->DebugName();
990     DCHECK(instruction_->GetLocations()->Intrinsified());
991     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
992     DCHECK_EQ(offset_, 0u);
993     DCHECK_EQ(scale_factor_, 0u);
994     DCHECK_EQ(use_load_acquire_, false);
995     // The location of the offset of the marked reference field within `obj_`.
996     Location field_offset = index_;
997     DCHECK(field_offset.IsRegister()) << field_offset;
998 
999     // Temporary register `temp_`, used to store the lock word, must
1000     // not be IP0 nor IP1, as we may use them to emit the reference
1001     // load (in the call to GenerateRawReferenceLoad below), and we
1002     // need the lock word to still be in `temp_` after the reference
1003     // load.
1004     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
1005     DCHECK_NE(LocationFrom(temp_).reg(), IP1);
1006 
1007     __ Bind(GetEntryLabel());
1008 
1009     // /* int32_t */ monitor = obj->monitor_
1010     uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1011     __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
1012     if (needs_null_check_) {
1013       codegen->MaybeRecordImplicitNullCheck(instruction_);
1014     }
1015     // /* LockWord */ lock_word = LockWord(monitor)
1016     static_assert(sizeof(LockWord) == sizeof(int32_t),
1017                   "art::LockWord and int32_t have different sizes.");
1018 
1019     // Introduce a dependency on the lock_word including rb_state,
1020     // to prevent load-load reordering, and without using
1021     // a memory barrier (which would be more expensive).
1022     // `obj` is unchanged by this operation, but its value now depends
1023     // on `temp`.
1024     __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
1025 
1026     // The actual reference load.
1027     // A possible implicit null check has already been handled above.
1028     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1029     arm64_codegen->GenerateRawReferenceLoad(instruction_,
1030                                             ref_,
1031                                             obj_,
1032                                             offset_,
1033                                             index_,
1034                                             scale_factor_,
1035                                             /* needs_null_check */ false,
1036                                             use_load_acquire_);
1037 
1038     // Mark the object `ref` when `obj` is gray.
1039     //
1040     //   if (rb_state == ReadBarrier::GrayState())
1041     //     ref = ReadBarrier::Mark(ref);
1042     //
1043     // Given the numeric representation, it's enough to check the low bit of the rb_state.
1044     static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
1045     static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1046     __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
1047 
1048     // Save the old value of the reference before marking it.
1049     // Note that we cannot use IP to save the old reference, as IP is
1050     // used internally by the ReadBarrierMarkRegX entry point, and we
1051     // need the old reference after the call to that entry point.
1052     DCHECK_NE(LocationFrom(temp_).reg(), IP0);
1053     __ Mov(temp_.W(), ref_reg);
1054 
1055     GenerateReadBarrierMarkRuntimeCall(codegen);
1056 
1057     // If the new reference is different from the old reference,
1058     // update the field in the holder (`*(obj_ + field_offset)`).
1059     //
1060     // Note that this field could also hold a different object, if
1061     // another thread had concurrently changed it. In that case, the
1062     // LDXR/CMP/BNE sequence of instructions in the compare-and-set
1063     // (CAS) operation below would abort the CAS, leaving the field
1064     // as-is.
1065     __ Cmp(temp_.W(), ref_reg);
1066     __ B(eq, GetExitLabel());
1067 
1068     // Update the the holder's field atomically.  This may fail if
1069     // mutator updates before us, but it's OK.  This is achieved
1070     // using a strong compare-and-set (CAS) operation with relaxed
1071     // memory synchronization ordering, where the expected value is
1072     // the old reference and the desired value is the new reference.
1073 
1074     MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
1075     UseScratchRegisterScope temps(masm);
1076 
1077     // Convenience aliases.
1078     Register base = obj_.W();
1079     Register offset = XRegisterFrom(field_offset);
1080     Register expected = temp_.W();
1081     Register value = ref_reg;
1082     Register tmp_ptr = temps.AcquireX();    // Pointer to actual memory.
1083     Register tmp_value = temps.AcquireW();  // Value in memory.
1084 
1085     __ Add(tmp_ptr, base.X(), Operand(offset));
1086 
1087     if (kPoisonHeapReferences) {
1088       arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
1089       if (value.Is(expected)) {
1090         // Do not poison `value`, as it is the same register as
1091         // `expected`, which has just been poisoned.
1092       } else {
1093         arm64_codegen->GetAssembler()->PoisonHeapReference(value);
1094       }
1095     }
1096 
1097     // do {
1098     //   tmp_value = [tmp_ptr] - expected;
1099     // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
1100 
1101     vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
1102     __ Bind(&loop_head);
1103     __ Ldxr(tmp_value, MemOperand(tmp_ptr));
1104     __ Cmp(tmp_value, expected);
1105     __ B(&comparison_failed, ne);
1106     __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
1107     __ Cbnz(tmp_value, &loop_head);
1108     __ B(&exit_loop);
1109     __ Bind(&comparison_failed);
1110     __ Clrex();
1111     __ Bind(&exit_loop);
1112 
1113     if (kPoisonHeapReferences) {
1114       arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
1115       if (value.Is(expected)) {
1116         // Do not unpoison `value`, as it is the same register as
1117         // `expected`, which has just been unpoisoned.
1118       } else {
1119         arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
1120       }
1121     }
1122 
1123     __ B(GetExitLabel());
1124   }
1125 
1126  private:
1127   // The register containing the object holding the marked object reference field.
1128   const Register obj_;
1129   // The offset, index and scale factor to access the reference in `obj_`.
1130   uint32_t offset_;
1131   Location index_;
1132   size_t scale_factor_;
1133   // Is a null check required?
1134   bool needs_null_check_;
1135   // Should this reference load use Load-Acquire semantics?
1136   bool use_load_acquire_;
1137   // A temporary register used to hold the lock word of `obj_`; and
1138   // also to hold the original reference value, when the reference is
1139   // marked.
1140   const Register temp_;
1141 
1142   DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
1143 };
1144 
1145 // Slow path generating a read barrier for a heap reference.
1146 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
1147  public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)1148   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
1149                                            Location out,
1150                                            Location ref,
1151                                            Location obj,
1152                                            uint32_t offset,
1153                                            Location index)
1154       : SlowPathCodeARM64(instruction),
1155         out_(out),
1156         ref_(ref),
1157         obj_(obj),
1158         offset_(offset),
1159         index_(index) {
1160     DCHECK(kEmitCompilerReadBarrier);
1161     // If `obj` is equal to `out` or `ref`, it means the initial object
1162     // has been overwritten by (or after) the heap object reference load
1163     // to be instrumented, e.g.:
1164     //
1165     //   __ Ldr(out, HeapOperand(out, class_offset);
1166     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
1167     //
1168     // In that case, we have lost the information about the original
1169     // object, and the emitted read barrier cannot work properly.
1170     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
1171     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
1172   }
1173 
EmitNativeCode(CodeGenerator * codegen)1174   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1175     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1176     LocationSummary* locations = instruction_->GetLocations();
1177     Primitive::Type type = Primitive::kPrimNot;
1178     DCHECK(locations->CanCall());
1179     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1180     DCHECK(instruction_->IsInstanceFieldGet() ||
1181            instruction_->IsStaticFieldGet() ||
1182            instruction_->IsArrayGet() ||
1183            instruction_->IsInstanceOf() ||
1184            instruction_->IsCheckCast() ||
1185            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
1186         << "Unexpected instruction in read barrier for heap reference slow path: "
1187         << instruction_->DebugName();
1188     // The read barrier instrumentation of object ArrayGet
1189     // instructions does not support the HIntermediateAddress
1190     // instruction.
1191     DCHECK(!(instruction_->IsArrayGet() &&
1192              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
1193 
1194     __ Bind(GetEntryLabel());
1195 
1196     SaveLiveRegisters(codegen, locations);
1197 
1198     // We may have to change the index's value, but as `index_` is a
1199     // constant member (like other "inputs" of this slow path),
1200     // introduce a copy of it, `index`.
1201     Location index = index_;
1202     if (index_.IsValid()) {
1203       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
1204       if (instruction_->IsArrayGet()) {
1205         // Compute the actual memory offset and store it in `index`.
1206         Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
1207         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
1208         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
1209           // We are about to change the value of `index_reg` (see the
1210           // calls to vixl::MacroAssembler::Lsl and
1211           // vixl::MacroAssembler::Mov below), but it has
1212           // not been saved by the previous call to
1213           // art::SlowPathCode::SaveLiveRegisters, as it is a
1214           // callee-save register --
1215           // art::SlowPathCode::SaveLiveRegisters does not consider
1216           // callee-save registers, as it has been designed with the
1217           // assumption that callee-save registers are supposed to be
1218           // handled by the called function.  So, as a callee-save
1219           // register, `index_reg` _would_ eventually be saved onto
1220           // the stack, but it would be too late: we would have
1221           // changed its value earlier.  Therefore, we manually save
1222           // it here into another freely available register,
1223           // `free_reg`, chosen of course among the caller-save
1224           // registers (as a callee-save `free_reg` register would
1225           // exhibit the same problem).
1226           //
1227           // Note we could have requested a temporary register from
1228           // the register allocator instead; but we prefer not to, as
1229           // this is a slow path, and we know we can find a
1230           // caller-save register that is available.
1231           Register free_reg = FindAvailableCallerSaveRegister(codegen);
1232           __ Mov(free_reg.W(), index_reg);
1233           index_reg = free_reg;
1234           index = LocationFrom(index_reg);
1235         } else {
1236           // The initial register stored in `index_` has already been
1237           // saved in the call to art::SlowPathCode::SaveLiveRegisters
1238           // (as it is not a callee-save register), so we can freely
1239           // use it.
1240         }
1241         // Shifting the index value contained in `index_reg` by the scale
1242         // factor (2) cannot overflow in practice, as the runtime is
1243         // unable to allocate object arrays with a size larger than
1244         // 2^26 - 1 (that is, 2^28 - 4 bytes).
1245         __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
1246         static_assert(
1247             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
1248             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
1249         __ Add(index_reg, index_reg, Operand(offset_));
1250       } else {
1251         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
1252         // intrinsics, `index_` is not shifted by a scale factor of 2
1253         // (as in the case of ArrayGet), as it is actually an offset
1254         // to an object field within an object.
1255         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
1256         DCHECK(instruction_->GetLocations()->Intrinsified());
1257         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
1258                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
1259             << instruction_->AsInvoke()->GetIntrinsic();
1260         DCHECK_EQ(offset_, 0u);
1261         DCHECK(index_.IsRegister());
1262       }
1263     }
1264 
1265     // We're moving two or three locations to locations that could
1266     // overlap, so we need a parallel move resolver.
1267     InvokeRuntimeCallingConvention calling_convention;
1268     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
1269     parallel_move.AddMove(ref_,
1270                           LocationFrom(calling_convention.GetRegisterAt(0)),
1271                           type,
1272                           nullptr);
1273     parallel_move.AddMove(obj_,
1274                           LocationFrom(calling_convention.GetRegisterAt(1)),
1275                           type,
1276                           nullptr);
1277     if (index.IsValid()) {
1278       parallel_move.AddMove(index,
1279                             LocationFrom(calling_convention.GetRegisterAt(2)),
1280                             Primitive::kPrimInt,
1281                             nullptr);
1282       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
1283     } else {
1284       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
1285       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
1286     }
1287     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
1288                                  instruction_,
1289                                  instruction_->GetDexPc(),
1290                                  this);
1291     CheckEntrypointTypes<
1292         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
1293     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1294 
1295     RestoreLiveRegisters(codegen, locations);
1296 
1297     __ B(GetExitLabel());
1298   }
1299 
GetDescription() const1300   const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
1301 
1302  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)1303   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
1304     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
1305     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
1306     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
1307       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
1308         return Register(VIXLRegCodeFromART(i), kXRegSize);
1309       }
1310     }
1311     // We shall never fail to find a free caller-save register, as
1312     // there are more than two core caller-save registers on ARM64
1313     // (meaning it is possible to find one which is different from
1314     // `ref` and `obj`).
1315     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
1316     LOG(FATAL) << "Could not find a free register";
1317     UNREACHABLE();
1318   }
1319 
1320   const Location out_;
1321   const Location ref_;
1322   const Location obj_;
1323   const uint32_t offset_;
1324   // An additional location containing an index to an array.
1325   // Only used for HArrayGet and the UnsafeGetObject &
1326   // UnsafeGetObjectVolatile intrinsics.
1327   const Location index_;
1328 
1329   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
1330 };
1331 
1332 // Slow path generating a read barrier for a GC root.
1333 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
1334  public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)1335   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
1336       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
1337     DCHECK(kEmitCompilerReadBarrier);
1338   }
1339 
EmitNativeCode(CodeGenerator * codegen)1340   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1341     LocationSummary* locations = instruction_->GetLocations();
1342     Primitive::Type type = Primitive::kPrimNot;
1343     DCHECK(locations->CanCall());
1344     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1345     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
1346         << "Unexpected instruction in read barrier for GC root slow path: "
1347         << instruction_->DebugName();
1348 
1349     __ Bind(GetEntryLabel());
1350     SaveLiveRegisters(codegen, locations);
1351 
1352     InvokeRuntimeCallingConvention calling_convention;
1353     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1354     // The argument of the ReadBarrierForRootSlow is not a managed
1355     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
1356     // thus we need a 64-bit move here, and we cannot use
1357     //
1358     //   arm64_codegen->MoveLocation(
1359     //       LocationFrom(calling_convention.GetRegisterAt(0)),
1360     //       root_,
1361     //       type);
1362     //
1363     // which would emit a 32-bit move, as `type` is a (32-bit wide)
1364     // reference type (`Primitive::kPrimNot`).
1365     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
1366     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
1367                                  instruction_,
1368                                  instruction_->GetDexPc(),
1369                                  this);
1370     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
1371     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1372 
1373     RestoreLiveRegisters(codegen, locations);
1374     __ B(GetExitLabel());
1375   }
1376 
GetDescription() const1377   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
1378 
1379  private:
1380   const Location out_;
1381   const Location root_;
1382 
1383   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
1384 };
1385 
1386 #undef __
1387 
GetNextLocation(Primitive::Type type)1388 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
1389   Location next_location;
1390   if (type == Primitive::kPrimVoid) {
1391     LOG(FATAL) << "Unreachable type " << type;
1392   }
1393 
1394   if (Primitive::IsFloatingPointType(type) &&
1395       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
1396     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
1397   } else if (!Primitive::IsFloatingPointType(type) &&
1398              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
1399     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
1400   } else {
1401     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
1402     next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
1403                                                  : Location::StackSlot(stack_offset);
1404   }
1405 
1406   // Space on the stack is reserved for all arguments.
1407   stack_index_ += Primitive::Is64BitType(type) ? 2 : 1;
1408   return next_location;
1409 }
1410 
GetMethodLocation() const1411 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
1412   return LocationFrom(kArtMethodRegister);
1413 }
1414 
CodeGeneratorARM64(HGraph * graph,const Arm64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1415 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
1416                                        const Arm64InstructionSetFeatures& isa_features,
1417                                        const CompilerOptions& compiler_options,
1418                                        OptimizingCompilerStats* stats)
1419     : CodeGenerator(graph,
1420                     kNumberOfAllocatableRegisters,
1421                     kNumberOfAllocatableFPRegisters,
1422                     kNumberOfAllocatableRegisterPairs,
1423                     callee_saved_core_registers.GetList(),
1424                     callee_saved_fp_registers.GetList(),
1425                     compiler_options,
1426                     stats),
1427       block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1428       jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1429       location_builder_(graph, this),
1430       instruction_visitor_(graph, this),
1431       move_resolver_(graph->GetArena(), this),
1432       assembler_(graph->GetArena()),
1433       isa_features_(isa_features),
1434       uint32_literals_(std::less<uint32_t>(),
1435                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1436       uint64_literals_(std::less<uint64_t>(),
1437                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1438       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1439       boot_image_string_patches_(StringReferenceValueComparator(),
1440                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1441       pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1442       boot_image_type_patches_(TypeReferenceValueComparator(),
1443                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1444       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1445       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1446       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1447       jit_string_patches_(StringReferenceValueComparator(),
1448                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1449       jit_class_patches_(TypeReferenceValueComparator(),
1450                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1451   // Save the link register (containing the return address) to mimic Quick.
1452   AddAllocatedRegister(LocationFrom(lr));
1453 }
1454 
1455 #define __ GetVIXLAssembler()->
1456 
EmitJumpTables()1457 void CodeGeneratorARM64::EmitJumpTables() {
1458   for (auto&& jump_table : jump_tables_) {
1459     jump_table->EmitTable(this);
1460   }
1461 }
1462 
Finalize(CodeAllocator * allocator)1463 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
1464   EmitJumpTables();
1465   // Ensure we emit the literal pool.
1466   __ FinalizeCode();
1467 
1468   CodeGenerator::Finalize(allocator);
1469 }
1470 
PrepareForEmitNativeCode()1471 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1472   // Note: There are 6 kinds of moves:
1473   // 1. constant -> GPR/FPR (non-cycle)
1474   // 2. constant -> stack (non-cycle)
1475   // 3. GPR/FPR -> GPR/FPR
1476   // 4. GPR/FPR -> stack
1477   // 5. stack -> GPR/FPR
1478   // 6. stack -> stack (non-cycle)
1479   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1480   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1481   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1482   // dependency.
1483   vixl_temps_.Open(GetVIXLAssembler());
1484 }
1485 
FinishEmitNativeCode()1486 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1487   vixl_temps_.Close();
1488 }
1489 
AllocateScratchLocationFor(Location::Kind kind)1490 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1491   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1492          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1493          || kind == Location::kSIMDStackSlot);
1494   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1495       ? Location::kFpuRegister
1496       : Location::kRegister;
1497   Location scratch = GetScratchLocation(kind);
1498   if (!scratch.Equals(Location::NoLocation())) {
1499     return scratch;
1500   }
1501   // Allocate from VIXL temp registers.
1502   if (kind == Location::kRegister) {
1503     scratch = LocationFrom(vixl_temps_.AcquireX());
1504   } else {
1505     DCHECK(kind == Location::kFpuRegister);
1506     scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
1507         ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
1508         : vixl_temps_.AcquireD());
1509   }
1510   AddScratchLocation(scratch);
1511   return scratch;
1512 }
1513 
FreeScratchLocation(Location loc)1514 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1515   if (loc.IsRegister()) {
1516     vixl_temps_.Release(XRegisterFrom(loc));
1517   } else {
1518     DCHECK(loc.IsFpuRegister());
1519     vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
1520   }
1521   RemoveScratchLocation(loc);
1522 }
1523 
EmitMove(size_t index)1524 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1525   MoveOperands* move = moves_[index];
1526   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid);
1527 }
1528 
GenerateFrameEntry()1529 void CodeGeneratorARM64::GenerateFrameEntry() {
1530   MacroAssembler* masm = GetVIXLAssembler();
1531   __ Bind(&frame_entry_label_);
1532 
1533   bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
1534   if (do_overflow_check) {
1535     UseScratchRegisterScope temps(masm);
1536     Register temp = temps.AcquireX();
1537     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1538     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
1539     {
1540       // Ensure that between load and RecordPcInfo there are no pools emitted.
1541       ExactAssemblyScope eas(GetVIXLAssembler(),
1542                              kInstructionSize,
1543                              CodeBufferCheckScope::kExactSize);
1544       __ ldr(wzr, MemOperand(temp, 0));
1545       RecordPcInfo(nullptr, 0);
1546     }
1547   }
1548 
1549   if (!HasEmptyFrame()) {
1550     int frame_size = GetFrameSize();
1551     // Stack layout:
1552     //      sp[frame_size - 8]        : lr.
1553     //      ...                       : other preserved core registers.
1554     //      ...                       : other preserved fp registers.
1555     //      ...                       : reserved frame space.
1556     //      sp[0]                     : current method.
1557 
1558     // Save the current method if we need it. Note that we do not
1559     // do this in HCurrentMethod, as the instruction might have been removed
1560     // in the SSA graph.
1561     if (RequiresCurrentMethod()) {
1562       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1563     } else {
1564       __ Claim(frame_size);
1565     }
1566     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1567     GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
1568         frame_size - GetCoreSpillSize());
1569     GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
1570         frame_size - FrameEntrySpillSize());
1571 
1572     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1573       // Initialize should_deoptimize flag to 0.
1574       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1575       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1576     }
1577   }
1578 }
1579 
GenerateFrameExit()1580 void CodeGeneratorARM64::GenerateFrameExit() {
1581   GetAssembler()->cfi().RememberState();
1582   if (!HasEmptyFrame()) {
1583     int frame_size = GetFrameSize();
1584     GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(),
1585         frame_size - FrameEntrySpillSize());
1586     GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(),
1587         frame_size - GetCoreSpillSize());
1588     __ Drop(frame_size);
1589     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1590   }
1591   __ Ret();
1592   GetAssembler()->cfi().RestoreState();
1593   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1594 }
1595 
GetFramePreservedCoreRegisters() const1596 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1597   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1598   return CPURegList(CPURegister::kRegister, kXRegSize,
1599                     core_spill_mask_);
1600 }
1601 
GetFramePreservedFPRegisters() const1602 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1603   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1604                                          GetNumberOfFloatingPointRegisters()));
1605   return CPURegList(CPURegister::kFPRegister, kDRegSize,
1606                     fpu_spill_mask_);
1607 }
1608 
Bind(HBasicBlock * block)1609 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1610   __ Bind(GetLabelOf(block));
1611 }
1612 
MoveConstant(Location location,int32_t value)1613 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1614   DCHECK(location.IsRegister());
1615   __ Mov(RegisterFrom(location, Primitive::kPrimInt), value);
1616 }
1617 
AddLocationAsTemp(Location location,LocationSummary * locations)1618 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1619   if (location.IsRegister()) {
1620     locations->AddTemp(location);
1621   } else {
1622     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1623   }
1624 }
1625 
MarkGCCard(Register object,Register value,bool value_can_be_null)1626 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
1627   UseScratchRegisterScope temps(GetVIXLAssembler());
1628   Register card = temps.AcquireX();
1629   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
1630   vixl::aarch64::Label done;
1631   if (value_can_be_null) {
1632     __ Cbz(value, &done);
1633   }
1634   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1635   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1636   __ Strb(card, MemOperand(card, temp.X()));
1637   if (value_can_be_null) {
1638     __ Bind(&done);
1639   }
1640 }
1641 
SetupBlockedRegisters() const1642 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1643   // Blocked core registers:
1644   //      lr        : Runtime reserved.
1645   //      tr        : Runtime reserved.
1646   //      xSuspend  : Runtime reserved. TODO: Unblock this when the runtime stops using it.
1647   //      ip1       : VIXL core temp.
1648   //      ip0       : VIXL core temp.
1649   //
1650   // Blocked fp registers:
1651   //      d31       : VIXL fp temp.
1652   CPURegList reserved_core_registers = vixl_reserved_core_registers;
1653   reserved_core_registers.Combine(runtime_reserved_core_registers);
1654   while (!reserved_core_registers.IsEmpty()) {
1655     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1656   }
1657 
1658   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1659   while (!reserved_fp_registers.IsEmpty()) {
1660     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1661   }
1662 
1663   if (GetGraph()->IsDebuggable()) {
1664     // Stubs do not save callee-save floating point registers. If the graph
1665     // is debuggable, we need to deal with these registers differently. For
1666     // now, just block them.
1667     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1668     while (!reserved_fp_registers_debuggable.IsEmpty()) {
1669       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1670     }
1671   }
1672 }
1673 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1674 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1675   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1676   __ Str(reg, MemOperand(sp, stack_index));
1677   return kArm64WordSize;
1678 }
1679 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1680 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1681   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1682   __ Ldr(reg, MemOperand(sp, stack_index));
1683   return kArm64WordSize;
1684 }
1685 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1686 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1687   FPRegister reg = FPRegister(reg_id, kDRegSize);
1688   __ Str(reg, MemOperand(sp, stack_index));
1689   return kArm64WordSize;
1690 }
1691 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1692 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1693   FPRegister reg = FPRegister(reg_id, kDRegSize);
1694   __ Ldr(reg, MemOperand(sp, stack_index));
1695   return kArm64WordSize;
1696 }
1697 
DumpCoreRegister(std::ostream & stream,int reg) const1698 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1699   stream << XRegister(reg);
1700 }
1701 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1702 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1703   stream << DRegister(reg);
1704 }
1705 
MoveConstant(CPURegister destination,HConstant * constant)1706 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1707   if (constant->IsIntConstant()) {
1708     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1709   } else if (constant->IsLongConstant()) {
1710     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1711   } else if (constant->IsNullConstant()) {
1712     __ Mov(Register(destination), 0);
1713   } else if (constant->IsFloatConstant()) {
1714     __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
1715   } else {
1716     DCHECK(constant->IsDoubleConstant());
1717     __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue());
1718   }
1719 }
1720 
1721 
CoherentConstantAndType(Location constant,Primitive::Type type)1722 static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
1723   DCHECK(constant.IsConstant());
1724   HConstant* cst = constant.GetConstant();
1725   return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
1726          // Null is mapped to a core W register, which we associate with kPrimInt.
1727          (cst->IsNullConstant() && type == Primitive::kPrimInt) ||
1728          (cst->IsLongConstant() && type == Primitive::kPrimLong) ||
1729          (cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
1730          (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
1731 }
1732 
1733 // Allocate a scratch register from the VIXL pool, querying first into
1734 // the floating-point register pool, and then the the core register
1735 // pool.  This is essentially a reimplementation of
1736 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1737 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1738 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1739                                                     vixl::aarch64::UseScratchRegisterScope* temps,
1740                                                     int size_in_bits) {
1741   return masm->GetScratchFPRegisterList()->IsEmpty()
1742       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1743       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1744 }
1745 
MoveLocation(Location destination,Location source,Primitive::Type dst_type)1746 void CodeGeneratorARM64::MoveLocation(Location destination,
1747                                       Location source,
1748                                       Primitive::Type dst_type) {
1749   if (source.Equals(destination)) {
1750     return;
1751   }
1752 
1753   // A valid move can always be inferred from the destination and source
1754   // locations. When moving from and to a register, the argument type can be
1755   // used to generate 32bit instead of 64bit moves. In debug mode we also
1756   // checks the coherency of the locations and the type.
1757   bool unspecified_type = (dst_type == Primitive::kPrimVoid);
1758 
1759   if (destination.IsRegister() || destination.IsFpuRegister()) {
1760     if (unspecified_type) {
1761       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1762       if (source.IsStackSlot() ||
1763           (src_cst != nullptr && (src_cst->IsIntConstant()
1764                                   || src_cst->IsFloatConstant()
1765                                   || src_cst->IsNullConstant()))) {
1766         // For stack slots and 32bit constants, a 64bit type is appropriate.
1767         dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
1768       } else {
1769         // If the source is a double stack slot or a 64bit constant, a 64bit
1770         // type is appropriate. Else the source is a register, and since the
1771         // type has not been specified, we chose a 64bit type to force a 64bit
1772         // move.
1773         dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
1774       }
1775     }
1776     DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) ||
1777            (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type)));
1778     CPURegister dst = CPURegisterFrom(destination, dst_type);
1779     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1780       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1781       __ Ldr(dst, StackOperandFrom(source));
1782     } else if (source.IsSIMDStackSlot()) {
1783       __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1784     } else if (source.IsConstant()) {
1785       DCHECK(CoherentConstantAndType(source, dst_type));
1786       MoveConstant(dst, source.GetConstant());
1787     } else if (source.IsRegister()) {
1788       if (destination.IsRegister()) {
1789         __ Mov(Register(dst), RegisterFrom(source, dst_type));
1790       } else {
1791         DCHECK(destination.IsFpuRegister());
1792         Primitive::Type source_type = Primitive::Is64BitType(dst_type)
1793             ? Primitive::kPrimLong
1794             : Primitive::kPrimInt;
1795         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1796       }
1797     } else {
1798       DCHECK(source.IsFpuRegister());
1799       if (destination.IsRegister()) {
1800         Primitive::Type source_type = Primitive::Is64BitType(dst_type)
1801             ? Primitive::kPrimDouble
1802             : Primitive::kPrimFloat;
1803         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1804       } else {
1805         DCHECK(destination.IsFpuRegister());
1806         if (GetGraph()->HasSIMD()) {
1807           __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1808         } else {
1809           __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
1810         }
1811       }
1812     }
1813   } else if (destination.IsSIMDStackSlot()) {
1814     if (source.IsFpuRegister()) {
1815       __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1816     } else {
1817       DCHECK(source.IsSIMDStackSlot());
1818       UseScratchRegisterScope temps(GetVIXLAssembler());
1819       if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) {
1820         Register temp = temps.AcquireX();
1821         __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1822         __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1823         __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1824         __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1825       } else {
1826         FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1827         __ Ldr(temp, StackOperandFrom(source));
1828         __ Str(temp, StackOperandFrom(destination));
1829       }
1830     }
1831   } else {  // The destination is not a register. It must be a stack slot.
1832     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1833     if (source.IsRegister() || source.IsFpuRegister()) {
1834       if (unspecified_type) {
1835         if (source.IsRegister()) {
1836           dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
1837         } else {
1838           dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
1839         }
1840       }
1841       DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) &&
1842              (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type)));
1843       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1844     } else if (source.IsConstant()) {
1845       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1846           << source << " " << dst_type;
1847       UseScratchRegisterScope temps(GetVIXLAssembler());
1848       HConstant* src_cst = source.GetConstant();
1849       CPURegister temp;
1850       if (src_cst->IsZeroBitPattern()) {
1851         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1852             ? Register(xzr)
1853             : Register(wzr);
1854       } else {
1855         if (src_cst->IsIntConstant()) {
1856           temp = temps.AcquireW();
1857         } else if (src_cst->IsLongConstant()) {
1858           temp = temps.AcquireX();
1859         } else if (src_cst->IsFloatConstant()) {
1860           temp = temps.AcquireS();
1861         } else {
1862           DCHECK(src_cst->IsDoubleConstant());
1863           temp = temps.AcquireD();
1864         }
1865         MoveConstant(temp, src_cst);
1866       }
1867       __ Str(temp, StackOperandFrom(destination));
1868     } else {
1869       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1870       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1871       UseScratchRegisterScope temps(GetVIXLAssembler());
1872       // Use any scratch register (a core or a floating-point one)
1873       // from VIXL scratch register pools as a temporary.
1874       //
1875       // We used to only use the FP scratch register pool, but in some
1876       // rare cases the only register from this pool (D31) would
1877       // already be used (e.g. within a ParallelMove instruction, when
1878       // a move is blocked by a another move requiring a scratch FP
1879       // register, which would reserve D31). To prevent this issue, we
1880       // ask for a scratch register of any type (core or FP).
1881       //
1882       // Also, we start by asking for a FP scratch register first, as the
1883       // demand of scratch core registers is higher.  This is why we
1884       // use AcquireFPOrCoreCPURegisterOfSize instead of
1885       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1886       // allocates core scratch registers first.
1887       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1888           GetVIXLAssembler(),
1889           &temps,
1890           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1891       __ Ldr(temp, StackOperandFrom(source));
1892       __ Str(temp, StackOperandFrom(destination));
1893     }
1894   }
1895 }
1896 
Load(Primitive::Type type,CPURegister dst,const MemOperand & src)1897 void CodeGeneratorARM64::Load(Primitive::Type type,
1898                               CPURegister dst,
1899                               const MemOperand& src) {
1900   switch (type) {
1901     case Primitive::kPrimBoolean:
1902       __ Ldrb(Register(dst), src);
1903       break;
1904     case Primitive::kPrimByte:
1905       __ Ldrsb(Register(dst), src);
1906       break;
1907     case Primitive::kPrimShort:
1908       __ Ldrsh(Register(dst), src);
1909       break;
1910     case Primitive::kPrimChar:
1911       __ Ldrh(Register(dst), src);
1912       break;
1913     case Primitive::kPrimInt:
1914     case Primitive::kPrimNot:
1915     case Primitive::kPrimLong:
1916     case Primitive::kPrimFloat:
1917     case Primitive::kPrimDouble:
1918       DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
1919       __ Ldr(dst, src);
1920       break;
1921     case Primitive::kPrimVoid:
1922       LOG(FATAL) << "Unreachable type " << type;
1923   }
1924 }
1925 
LoadAcquire(HInstruction * instruction,CPURegister dst,const MemOperand & src,bool needs_null_check)1926 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1927                                      CPURegister dst,
1928                                      const MemOperand& src,
1929                                      bool needs_null_check) {
1930   MacroAssembler* masm = GetVIXLAssembler();
1931   UseScratchRegisterScope temps(masm);
1932   Register temp_base = temps.AcquireX();
1933   Primitive::Type type = instruction->GetType();
1934 
1935   DCHECK(!src.IsPreIndex());
1936   DCHECK(!src.IsPostIndex());
1937 
1938   // TODO(vixl): Let the MacroAssembler handle MemOperand.
1939   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1940   {
1941     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1942     MemOperand base = MemOperand(temp_base);
1943     switch (type) {
1944       case Primitive::kPrimBoolean:
1945         {
1946           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1947           __ ldarb(Register(dst), base);
1948           if (needs_null_check) {
1949             MaybeRecordImplicitNullCheck(instruction);
1950           }
1951         }
1952         break;
1953       case Primitive::kPrimByte:
1954         {
1955           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1956           __ ldarb(Register(dst), base);
1957           if (needs_null_check) {
1958             MaybeRecordImplicitNullCheck(instruction);
1959           }
1960         }
1961         __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
1962         break;
1963       case Primitive::kPrimChar:
1964         {
1965           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1966           __ ldarh(Register(dst), base);
1967           if (needs_null_check) {
1968             MaybeRecordImplicitNullCheck(instruction);
1969           }
1970         }
1971         break;
1972       case Primitive::kPrimShort:
1973         {
1974           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1975           __ ldarh(Register(dst), base);
1976           if (needs_null_check) {
1977             MaybeRecordImplicitNullCheck(instruction);
1978           }
1979         }
1980         __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
1981         break;
1982       case Primitive::kPrimInt:
1983       case Primitive::kPrimNot:
1984       case Primitive::kPrimLong:
1985         DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
1986         {
1987           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1988           __ ldar(Register(dst), base);
1989           if (needs_null_check) {
1990             MaybeRecordImplicitNullCheck(instruction);
1991           }
1992         }
1993         break;
1994       case Primitive::kPrimFloat:
1995       case Primitive::kPrimDouble: {
1996         DCHECK(dst.IsFPRegister());
1997         DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
1998 
1999         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2000         {
2001           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2002           __ ldar(temp, base);
2003           if (needs_null_check) {
2004             MaybeRecordImplicitNullCheck(instruction);
2005           }
2006         }
2007         __ Fmov(FPRegister(dst), temp);
2008         break;
2009       }
2010       case Primitive::kPrimVoid:
2011         LOG(FATAL) << "Unreachable type " << type;
2012     }
2013   }
2014 }
2015 
Store(Primitive::Type type,CPURegister src,const MemOperand & dst)2016 void CodeGeneratorARM64::Store(Primitive::Type type,
2017                                CPURegister src,
2018                                const MemOperand& dst) {
2019   switch (type) {
2020     case Primitive::kPrimBoolean:
2021     case Primitive::kPrimByte:
2022       __ Strb(Register(src), dst);
2023       break;
2024     case Primitive::kPrimChar:
2025     case Primitive::kPrimShort:
2026       __ Strh(Register(src), dst);
2027       break;
2028     case Primitive::kPrimInt:
2029     case Primitive::kPrimNot:
2030     case Primitive::kPrimLong:
2031     case Primitive::kPrimFloat:
2032     case Primitive::kPrimDouble:
2033       DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
2034       __ Str(src, dst);
2035       break;
2036     case Primitive::kPrimVoid:
2037       LOG(FATAL) << "Unreachable type " << type;
2038   }
2039 }
2040 
StoreRelease(HInstruction * instruction,Primitive::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)2041 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
2042                                       Primitive::Type type,
2043                                       CPURegister src,
2044                                       const MemOperand& dst,
2045                                       bool needs_null_check) {
2046   MacroAssembler* masm = GetVIXLAssembler();
2047   UseScratchRegisterScope temps(GetVIXLAssembler());
2048   Register temp_base = temps.AcquireX();
2049 
2050   DCHECK(!dst.IsPreIndex());
2051   DCHECK(!dst.IsPostIndex());
2052 
2053   // TODO(vixl): Let the MacroAssembler handle this.
2054   Operand op = OperandFromMemOperand(dst);
2055   __ Add(temp_base, dst.GetBaseRegister(), op);
2056   MemOperand base = MemOperand(temp_base);
2057   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2058   switch (type) {
2059     case Primitive::kPrimBoolean:
2060     case Primitive::kPrimByte:
2061       {
2062         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2063         __ stlrb(Register(src), base);
2064         if (needs_null_check) {
2065           MaybeRecordImplicitNullCheck(instruction);
2066         }
2067       }
2068       break;
2069     case Primitive::kPrimChar:
2070     case Primitive::kPrimShort:
2071       {
2072         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2073         __ stlrh(Register(src), base);
2074         if (needs_null_check) {
2075           MaybeRecordImplicitNullCheck(instruction);
2076         }
2077       }
2078       break;
2079     case Primitive::kPrimInt:
2080     case Primitive::kPrimNot:
2081     case Primitive::kPrimLong:
2082       DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
2083       {
2084         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2085         __ stlr(Register(src), base);
2086         if (needs_null_check) {
2087           MaybeRecordImplicitNullCheck(instruction);
2088         }
2089       }
2090       break;
2091     case Primitive::kPrimFloat:
2092     case Primitive::kPrimDouble: {
2093       DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
2094       Register temp_src;
2095       if (src.IsZero()) {
2096         // The zero register is used to avoid synthesizing zero constants.
2097         temp_src = Register(src);
2098       } else {
2099         DCHECK(src.IsFPRegister());
2100         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2101         __ Fmov(temp_src, FPRegister(src));
2102       }
2103       {
2104         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2105         __ stlr(temp_src, base);
2106         if (needs_null_check) {
2107           MaybeRecordImplicitNullCheck(instruction);
2108         }
2109       }
2110       break;
2111     }
2112     case Primitive::kPrimVoid:
2113       LOG(FATAL) << "Unreachable type " << type;
2114   }
2115 }
2116 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)2117 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
2118                                        HInstruction* instruction,
2119                                        uint32_t dex_pc,
2120                                        SlowPathCode* slow_path) {
2121   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2122 
2123   __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
2124   {
2125     // Ensure the pc position is recorded immediately after the `blr` instruction.
2126     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2127     __ blr(lr);
2128     if (EntrypointRequiresStackMap(entrypoint)) {
2129       RecordPcInfo(instruction, dex_pc, slow_path);
2130     }
2131   }
2132 }
2133 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)2134 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2135                                                              HInstruction* instruction,
2136                                                              SlowPathCode* slow_path) {
2137   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2138   __ Ldr(lr, MemOperand(tr, entry_point_offset));
2139   __ Blr(lr);
2140 }
2141 
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)2142 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
2143                                                                      Register class_reg) {
2144   UseScratchRegisterScope temps(GetVIXLAssembler());
2145   Register temp = temps.AcquireW();
2146   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
2147 
2148   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
2149   // TODO(vixl): Let the MacroAssembler handle MemOperand.
2150   __ Add(temp, class_reg, status_offset);
2151   __ Ldar(temp, HeapOperand(temp));
2152   __ Cmp(temp, mirror::Class::kStatusInitialized);
2153   __ B(lt, slow_path->GetEntryLabel());
2154   __ Bind(slow_path->GetExitLabel());
2155 }
2156 
GenerateMemoryBarrier(MemBarrierKind kind)2157 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
2158   BarrierType type = BarrierAll;
2159 
2160   switch (kind) {
2161     case MemBarrierKind::kAnyAny:
2162     case MemBarrierKind::kAnyStore: {
2163       type = BarrierAll;
2164       break;
2165     }
2166     case MemBarrierKind::kLoadAny: {
2167       type = BarrierReads;
2168       break;
2169     }
2170     case MemBarrierKind::kStoreStore: {
2171       type = BarrierWrites;
2172       break;
2173     }
2174     default:
2175       LOG(FATAL) << "Unexpected memory barrier " << kind;
2176   }
2177   __ Dmb(InnerShareable, type);
2178 }
2179 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)2180 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
2181                                                          HBasicBlock* successor) {
2182   SuspendCheckSlowPathARM64* slow_path =
2183       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
2184   if (slow_path == nullptr) {
2185     slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
2186     instruction->SetSlowPath(slow_path);
2187     codegen_->AddSlowPath(slow_path);
2188     if (successor != nullptr) {
2189       DCHECK(successor->IsLoopHeader());
2190       codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
2191     }
2192   } else {
2193     DCHECK_EQ(slow_path->GetSuccessor(), successor);
2194   }
2195 
2196   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
2197   Register temp = temps.AcquireW();
2198 
2199   __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
2200   if (successor == nullptr) {
2201     __ Cbnz(temp, slow_path->GetEntryLabel());
2202     __ Bind(slow_path->GetReturnLabel());
2203   } else {
2204     __ Cbz(temp, codegen_->GetLabelOf(successor));
2205     __ B(slow_path->GetEntryLabel());
2206     // slow_path will return to GetLabelOf(successor).
2207   }
2208 }
2209 
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)2210 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
2211                                                              CodeGeneratorARM64* codegen)
2212       : InstructionCodeGenerator(graph, codegen),
2213         assembler_(codegen->GetAssembler()),
2214         codegen_(codegen) {}
2215 
2216 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
2217   /* No unimplemented IR. */
2218 
2219 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
2220 
2221 enum UnimplementedInstructionBreakCode {
2222   // Using a base helps identify when we hit such breakpoints.
2223   UnimplementedInstructionBreakCodeBaseCode = 0x900,
2224 #define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name),
2225   FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION)
2226 #undef ENUM_UNIMPLEMENTED_INSTRUCTION
2227 };
2228 
2229 #define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name)                               \
2230   void InstructionCodeGeneratorARM64::Visit##name(H##name* instr ATTRIBUTE_UNUSED) {  \
2231     __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name));                               \
2232   }                                                                                   \
2233   void LocationsBuilderARM64::Visit##name(H##name* instr) {                           \
2234     LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); \
2235     locations->SetOut(Location::Any());                                               \
2236   }
FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS)2237   FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS)
2238 #undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS
2239 
2240 #undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE
2241 #undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION
2242 
2243 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
2244   DCHECK_EQ(instr->InputCount(), 2U);
2245   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
2246   Primitive::Type type = instr->GetResultType();
2247   switch (type) {
2248     case Primitive::kPrimInt:
2249     case Primitive::kPrimLong:
2250       locations->SetInAt(0, Location::RequiresRegister());
2251       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
2252       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2253       break;
2254 
2255     case Primitive::kPrimFloat:
2256     case Primitive::kPrimDouble:
2257       locations->SetInAt(0, Location::RequiresFpuRegister());
2258       locations->SetInAt(1, Location::RequiresFpuRegister());
2259       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2260       break;
2261 
2262     default:
2263       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
2264   }
2265 }
2266 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2267 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
2268                                            const FieldInfo& field_info) {
2269   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2270 
2271   bool object_field_get_with_read_barrier =
2272       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
2273   LocationSummary* locations =
2274       new (GetGraph()->GetArena()) LocationSummary(instruction,
2275                                                    object_field_get_with_read_barrier ?
2276                                                        LocationSummary::kCallOnSlowPath :
2277                                                        LocationSummary::kNoCall);
2278   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
2279     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2280     // We need a temporary register for the read barrier marking slow
2281     // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
2282     if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2283         !Runtime::Current()->UseJitCompilation() &&
2284         !field_info.IsVolatile()) {
2285       // If link-time thunks for the Baker read barrier are enabled, for AOT
2286       // non-volatile loads we need a temporary only if the offset is too big.
2287       if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
2288         locations->AddTemp(FixedTempLocation());
2289       }
2290     } else {
2291       locations->AddTemp(Location::RequiresRegister());
2292     }
2293   }
2294   locations->SetInAt(0, Location::RequiresRegister());
2295   if (Primitive::IsFloatingPointType(instruction->GetType())) {
2296     locations->SetOut(Location::RequiresFpuRegister());
2297   } else {
2298     // The output overlaps for an object field get when read barriers
2299     // are enabled: we do not want the load to overwrite the object's
2300     // location, as we need it to emit the read barrier.
2301     locations->SetOut(
2302         Location::RequiresRegister(),
2303         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2304   }
2305 }
2306 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2307 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2308                                                    const FieldInfo& field_info) {
2309   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2310   LocationSummary* locations = instruction->GetLocations();
2311   Location base_loc = locations->InAt(0);
2312   Location out = locations->Out();
2313   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2314   Primitive::Type field_type = field_info.GetFieldType();
2315   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
2316 
2317   if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2318     // Object FieldGet with Baker's read barrier case.
2319     // /* HeapReference<Object> */ out = *(base + offset)
2320     Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
2321     Location maybe_temp =
2322         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2323     // Note that potential implicit null checks are handled in this
2324     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2325     codegen_->GenerateFieldLoadWithBakerReadBarrier(
2326         instruction,
2327         out,
2328         base,
2329         offset,
2330         maybe_temp,
2331         /* needs_null_check */ true,
2332         field_info.IsVolatile());
2333   } else {
2334     // General case.
2335     if (field_info.IsVolatile()) {
2336       // Note that a potential implicit null check is handled in this
2337       // CodeGeneratorARM64::LoadAcquire call.
2338       // NB: LoadAcquire will record the pc info if needed.
2339       codegen_->LoadAcquire(
2340           instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
2341     } else {
2342       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2343       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2344       codegen_->Load(field_type, OutputCPURegister(instruction), field);
2345       codegen_->MaybeRecordImplicitNullCheck(instruction);
2346     }
2347     if (field_type == Primitive::kPrimNot) {
2348       // If read barriers are enabled, emit read barriers other than
2349       // Baker's using a slow path (and also unpoison the loaded
2350       // reference, if heap poisoning is enabled).
2351       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2352     }
2353   }
2354 }
2355 
HandleFieldSet(HInstruction * instruction)2356 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2357   LocationSummary* locations =
2358       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
2359   locations->SetInAt(0, Location::RequiresRegister());
2360   if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
2361     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
2362   } else if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
2363     locations->SetInAt(1, Location::RequiresFpuRegister());
2364   } else {
2365     locations->SetInAt(1, Location::RequiresRegister());
2366   }
2367 }
2368 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)2369 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2370                                                    const FieldInfo& field_info,
2371                                                    bool value_can_be_null) {
2372   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2373 
2374   Register obj = InputRegisterAt(instruction, 0);
2375   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2376   CPURegister source = value;
2377   Offset offset = field_info.GetFieldOffset();
2378   Primitive::Type field_type = field_info.GetFieldType();
2379 
2380   {
2381     // We use a block to end the scratch scope before the write barrier, thus
2382     // freeing the temporary registers so they can be used in `MarkGCCard`.
2383     UseScratchRegisterScope temps(GetVIXLAssembler());
2384 
2385     if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
2386       DCHECK(value.IsW());
2387       Register temp = temps.AcquireW();
2388       __ Mov(temp, value.W());
2389       GetAssembler()->PoisonHeapReference(temp.W());
2390       source = temp;
2391     }
2392 
2393     if (field_info.IsVolatile()) {
2394       codegen_->StoreRelease(
2395           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true);
2396     } else {
2397       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2398       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2399       codegen_->Store(field_type, source, HeapOperand(obj, offset));
2400       codegen_->MaybeRecordImplicitNullCheck(instruction);
2401     }
2402   }
2403 
2404   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
2405     codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
2406   }
2407 }
2408 
HandleBinaryOp(HBinaryOperation * instr)2409 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2410   Primitive::Type type = instr->GetType();
2411 
2412   switch (type) {
2413     case Primitive::kPrimInt:
2414     case Primitive::kPrimLong: {
2415       Register dst = OutputRegister(instr);
2416       Register lhs = InputRegisterAt(instr, 0);
2417       Operand rhs = InputOperandAt(instr, 1);
2418       if (instr->IsAdd()) {
2419         __ Add(dst, lhs, rhs);
2420       } else if (instr->IsAnd()) {
2421         __ And(dst, lhs, rhs);
2422       } else if (instr->IsOr()) {
2423         __ Orr(dst, lhs, rhs);
2424       } else if (instr->IsSub()) {
2425         __ Sub(dst, lhs, rhs);
2426       } else if (instr->IsRor()) {
2427         if (rhs.IsImmediate()) {
2428           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2429           __ Ror(dst, lhs, shift);
2430         } else {
2431           // Ensure shift distance is in the same size register as the result. If
2432           // we are rotating a long and the shift comes in a w register originally,
2433           // we don't need to sxtw for use as an x since the shift distances are
2434           // all & reg_bits - 1.
2435           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2436         }
2437       } else {
2438         DCHECK(instr->IsXor());
2439         __ Eor(dst, lhs, rhs);
2440       }
2441       break;
2442     }
2443     case Primitive::kPrimFloat:
2444     case Primitive::kPrimDouble: {
2445       FPRegister dst = OutputFPRegister(instr);
2446       FPRegister lhs = InputFPRegisterAt(instr, 0);
2447       FPRegister rhs = InputFPRegisterAt(instr, 1);
2448       if (instr->IsAdd()) {
2449         __ Fadd(dst, lhs, rhs);
2450       } else if (instr->IsSub()) {
2451         __ Fsub(dst, lhs, rhs);
2452       } else {
2453         LOG(FATAL) << "Unexpected floating-point binary operation";
2454       }
2455       break;
2456     }
2457     default:
2458       LOG(FATAL) << "Unexpected binary operation type " << type;
2459   }
2460 }
2461 
HandleShift(HBinaryOperation * instr)2462 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2463   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2464 
2465   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
2466   Primitive::Type type = instr->GetResultType();
2467   switch (type) {
2468     case Primitive::kPrimInt:
2469     case Primitive::kPrimLong: {
2470       locations->SetInAt(0, Location::RequiresRegister());
2471       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2472       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2473       break;
2474     }
2475     default:
2476       LOG(FATAL) << "Unexpected shift type " << type;
2477   }
2478 }
2479 
HandleShift(HBinaryOperation * instr)2480 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2481   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2482 
2483   Primitive::Type type = instr->GetType();
2484   switch (type) {
2485     case Primitive::kPrimInt:
2486     case Primitive::kPrimLong: {
2487       Register dst = OutputRegister(instr);
2488       Register lhs = InputRegisterAt(instr, 0);
2489       Operand rhs = InputOperandAt(instr, 1);
2490       if (rhs.IsImmediate()) {
2491         uint32_t shift_value = rhs.GetImmediate() &
2492             (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2493         if (instr->IsShl()) {
2494           __ Lsl(dst, lhs, shift_value);
2495         } else if (instr->IsShr()) {
2496           __ Asr(dst, lhs, shift_value);
2497         } else {
2498           __ Lsr(dst, lhs, shift_value);
2499         }
2500       } else {
2501         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2502 
2503         if (instr->IsShl()) {
2504           __ Lsl(dst, lhs, rhs_reg);
2505         } else if (instr->IsShr()) {
2506           __ Asr(dst, lhs, rhs_reg);
2507         } else {
2508           __ Lsr(dst, lhs, rhs_reg);
2509         }
2510       }
2511       break;
2512     }
2513     default:
2514       LOG(FATAL) << "Unexpected shift operation type " << type;
2515   }
2516 }
2517 
VisitAdd(HAdd * instruction)2518 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2519   HandleBinaryOp(instruction);
2520 }
2521 
VisitAdd(HAdd * instruction)2522 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2523   HandleBinaryOp(instruction);
2524 }
2525 
VisitAnd(HAnd * instruction)2526 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2527   HandleBinaryOp(instruction);
2528 }
2529 
VisitAnd(HAnd * instruction)2530 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2531   HandleBinaryOp(instruction);
2532 }
2533 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2534 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2535   DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
2536   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
2537   locations->SetInAt(0, Location::RequiresRegister());
2538   // There is no immediate variant of negated bitwise instructions in AArch64.
2539   locations->SetInAt(1, Location::RequiresRegister());
2540   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2541 }
2542 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2543 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2544   Register dst = OutputRegister(instr);
2545   Register lhs = InputRegisterAt(instr, 0);
2546   Register rhs = InputRegisterAt(instr, 1);
2547 
2548   switch (instr->GetOpKind()) {
2549     case HInstruction::kAnd:
2550       __ Bic(dst, lhs, rhs);
2551       break;
2552     case HInstruction::kOr:
2553       __ Orn(dst, lhs, rhs);
2554       break;
2555     case HInstruction::kXor:
2556       __ Eon(dst, lhs, rhs);
2557       break;
2558     default:
2559       LOG(FATAL) << "Unreachable";
2560   }
2561 }
2562 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2563 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2564     HDataProcWithShifterOp* instruction) {
2565   DCHECK(instruction->GetType() == Primitive::kPrimInt ||
2566          instruction->GetType() == Primitive::kPrimLong);
2567   LocationSummary* locations =
2568       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
2569   if (instruction->GetInstrKind() == HInstruction::kNeg) {
2570     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
2571   } else {
2572     locations->SetInAt(0, Location::RequiresRegister());
2573   }
2574   locations->SetInAt(1, Location::RequiresRegister());
2575   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2576 }
2577 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2578 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2579     HDataProcWithShifterOp* instruction) {
2580   Primitive::Type type = instruction->GetType();
2581   HInstruction::InstructionKind kind = instruction->GetInstrKind();
2582   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
2583   Register out = OutputRegister(instruction);
2584   Register left;
2585   if (kind != HInstruction::kNeg) {
2586     left = InputRegisterAt(instruction, 0);
2587   }
2588   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2589   // shifter operand operation, the IR generating `right_reg` (input to the type
2590   // conversion) can have a different type from the current instruction's type,
2591   // so we manually indicate the type.
2592   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2593   Operand right_operand(0);
2594 
2595   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2596   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2597     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2598   } else {
2599     right_operand = Operand(right_reg,
2600                             helpers::ShiftFromOpKind(op_kind),
2601                             instruction->GetShiftAmount());
2602   }
2603 
2604   // Logical binary operations do not support extension operations in the
2605   // operand. Note that VIXL would still manage if it was passed by generating
2606   // the extension as a separate instruction.
2607   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2608   DCHECK(!right_operand.IsExtendedRegister() ||
2609          (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
2610           kind != HInstruction::kNeg));
2611   switch (kind) {
2612     case HInstruction::kAdd:
2613       __ Add(out, left, right_operand);
2614       break;
2615     case HInstruction::kAnd:
2616       __ And(out, left, right_operand);
2617       break;
2618     case HInstruction::kNeg:
2619       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2620       __ Neg(out, right_operand);
2621       break;
2622     case HInstruction::kOr:
2623       __ Orr(out, left, right_operand);
2624       break;
2625     case HInstruction::kSub:
2626       __ Sub(out, left, right_operand);
2627       break;
2628     case HInstruction::kXor:
2629       __ Eor(out, left, right_operand);
2630       break;
2631     default:
2632       LOG(FATAL) << "Unexpected operation kind: " << kind;
2633       UNREACHABLE();
2634   }
2635 }
2636 
VisitIntermediateAddress(HIntermediateAddress * instruction)2637 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2638   LocationSummary* locations =
2639       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
2640   locations->SetInAt(0, Location::RequiresRegister());
2641   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2642   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2643 }
2644 
VisitIntermediateAddress(HIntermediateAddress * instruction)2645 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2646   __ Add(OutputRegister(instruction),
2647          InputRegisterAt(instruction, 0),
2648          Operand(InputOperandAt(instruction, 1)));
2649 }
2650 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2651 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2652   LocationSummary* locations =
2653       new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
2654   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2655   if (instr->GetOpKind() == HInstruction::kSub &&
2656       accumulator->IsConstant() &&
2657       accumulator->AsConstant()->IsArithmeticZero()) {
2658     // Don't allocate register for Mneg instruction.
2659   } else {
2660     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2661                        Location::RequiresRegister());
2662   }
2663   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2664   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2665   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2666 }
2667 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2668 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2669   Register res = OutputRegister(instr);
2670   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2671   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2672 
2673   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2674   // This fixup should be carried out for all multiply-accumulate instructions:
2675   // madd, msub, smaddl, smsubl, umaddl and umsubl.
2676   if (instr->GetType() == Primitive::kPrimLong &&
2677       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2678     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2679     vixl::aarch64::Instruction* prev =
2680         masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
2681     if (prev->IsLoadOrStore()) {
2682       // Make sure we emit only exactly one nop.
2683       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2684       __ nop();
2685     }
2686   }
2687 
2688   if (instr->GetOpKind() == HInstruction::kAdd) {
2689     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2690     __ Madd(res, mul_left, mul_right, accumulator);
2691   } else {
2692     DCHECK(instr->GetOpKind() == HInstruction::kSub);
2693     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2694     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2695       __ Mneg(res, mul_left, mul_right);
2696     } else {
2697       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2698       __ Msub(res, mul_left, mul_right, accumulator);
2699     }
2700   }
2701 }
2702 
VisitArrayGet(HArrayGet * instruction)2703 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2704   bool object_array_get_with_read_barrier =
2705       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
2706   LocationSummary* locations =
2707       new (GetGraph()->GetArena()) LocationSummary(instruction,
2708                                                    object_array_get_with_read_barrier ?
2709                                                        LocationSummary::kCallOnSlowPath :
2710                                                        LocationSummary::kNoCall);
2711   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2712     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2713     // We need a temporary register for the read barrier marking slow
2714     // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
2715     if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2716         !Runtime::Current()->UseJitCompilation() &&
2717         instruction->GetIndex()->IsConstant()) {
2718       // Array loads with constant index are treated as field loads.
2719       // If link-time thunks for the Baker read barrier are enabled, for AOT
2720       // constant index loads we need a temporary only if the offset is too big.
2721       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2722       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2723       offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
2724       if (offset >= kReferenceLoadMinFarOffset) {
2725         locations->AddTemp(FixedTempLocation());
2726       }
2727     } else {
2728       locations->AddTemp(Location::RequiresRegister());
2729     }
2730   }
2731   locations->SetInAt(0, Location::RequiresRegister());
2732   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2733   if (Primitive::IsFloatingPointType(instruction->GetType())) {
2734     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2735   } else {
2736     // The output overlaps in the case of an object array get with
2737     // read barriers enabled: we do not want the move to overwrite the
2738     // array's location, as we need it to emit the read barrier.
2739     locations->SetOut(
2740         Location::RequiresRegister(),
2741         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2742   }
2743 }
2744 
VisitArrayGet(HArrayGet * instruction)2745 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2746   Primitive::Type type = instruction->GetType();
2747   Register obj = InputRegisterAt(instruction, 0);
2748   LocationSummary* locations = instruction->GetLocations();
2749   Location index = locations->InAt(1);
2750   Location out = locations->Out();
2751   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2752   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2753                                         instruction->IsStringCharAt();
2754   MacroAssembler* masm = GetVIXLAssembler();
2755   UseScratchRegisterScope temps(masm);
2756 
2757   // The read barrier instrumentation of object ArrayGet instructions
2758   // does not support the HIntermediateAddress instruction.
2759   DCHECK(!((type == Primitive::kPrimNot) &&
2760            instruction->GetArray()->IsIntermediateAddress() &&
2761            kEmitCompilerReadBarrier));
2762 
2763   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2764     // Object ArrayGet with Baker's read barrier case.
2765     // Note that a potential implicit null check is handled in the
2766     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2767     if (index.IsConstant()) {
2768       // Array load with a constant index can be treated as a field load.
2769       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
2770       Location maybe_temp =
2771           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2772       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2773                                                       out,
2774                                                       obj.W(),
2775                                                       offset,
2776                                                       maybe_temp,
2777                                                       /* needs_null_check */ true,
2778                                                       /* use_load_acquire */ false);
2779     } else {
2780       Register temp = WRegisterFrom(locations->GetTemp(0));
2781       codegen_->GenerateArrayLoadWithBakerReadBarrier(
2782           instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
2783     }
2784   } else {
2785     // General case.
2786     MemOperand source = HeapOperand(obj);
2787     Register length;
2788     if (maybe_compressed_char_at) {
2789       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2790       length = temps.AcquireW();
2791       {
2792         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2793         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2794 
2795         if (instruction->GetArray()->IsIntermediateAddress()) {
2796           DCHECK_LT(count_offset, offset);
2797           int64_t adjusted_offset =
2798               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2799           // Note that `adjusted_offset` is negative, so this will be a LDUR.
2800           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2801         } else {
2802           __ Ldr(length, HeapOperand(obj, count_offset));
2803         }
2804         codegen_->MaybeRecordImplicitNullCheck(instruction);
2805       }
2806     }
2807     if (index.IsConstant()) {
2808       if (maybe_compressed_char_at) {
2809         vixl::aarch64::Label uncompressed_load, done;
2810         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2811                       "Expecting 0=compressed, 1=uncompressed");
2812         __ Tbnz(length.W(), 0, &uncompressed_load);
2813         __ Ldrb(Register(OutputCPURegister(instruction)),
2814                 HeapOperand(obj, offset + Int64ConstantFrom(index)));
2815         __ B(&done);
2816         __ Bind(&uncompressed_load);
2817         __ Ldrh(Register(OutputCPURegister(instruction)),
2818                 HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
2819         __ Bind(&done);
2820       } else {
2821         offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
2822         source = HeapOperand(obj, offset);
2823       }
2824     } else {
2825       Register temp = temps.AcquireSameSizeAs(obj);
2826       if (instruction->GetArray()->IsIntermediateAddress()) {
2827         // We do not need to compute the intermediate address from the array: the
2828         // input instruction has done it already. See the comment in
2829         // `TryExtractArrayAccessAddress()`.
2830         if (kIsDebugBuild) {
2831           HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2832           DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2833         }
2834         temp = obj;
2835       } else {
2836         __ Add(temp, obj, offset);
2837       }
2838       if (maybe_compressed_char_at) {
2839         vixl::aarch64::Label uncompressed_load, done;
2840         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2841                       "Expecting 0=compressed, 1=uncompressed");
2842         __ Tbnz(length.W(), 0, &uncompressed_load);
2843         __ Ldrb(Register(OutputCPURegister(instruction)),
2844                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2845         __ B(&done);
2846         __ Bind(&uncompressed_load);
2847         __ Ldrh(Register(OutputCPURegister(instruction)),
2848                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2849         __ Bind(&done);
2850       } else {
2851         source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
2852       }
2853     }
2854     if (!maybe_compressed_char_at) {
2855       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2856       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2857       codegen_->Load(type, OutputCPURegister(instruction), source);
2858       codegen_->MaybeRecordImplicitNullCheck(instruction);
2859     }
2860 
2861     if (type == Primitive::kPrimNot) {
2862       static_assert(
2863           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2864           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2865       Location obj_loc = locations->InAt(0);
2866       if (index.IsConstant()) {
2867         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2868       } else {
2869         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2870       }
2871     }
2872   }
2873 }
2874 
VisitArrayLength(HArrayLength * instruction)2875 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2876   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
2877   locations->SetInAt(0, Location::RequiresRegister());
2878   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2879 }
2880 
VisitArrayLength(HArrayLength * instruction)2881 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2882   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2883   vixl::aarch64::Register out = OutputRegister(instruction);
2884   {
2885     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2886     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2887     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2888     codegen_->MaybeRecordImplicitNullCheck(instruction);
2889   }
2890   // Mask out compression flag from String's array length.
2891   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2892     __ Lsr(out.W(), out.W(), 1u);
2893   }
2894 }
2895 
VisitArraySet(HArraySet * instruction)2896 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2897   Primitive::Type value_type = instruction->GetComponentType();
2898 
2899   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2900   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
2901       instruction,
2902       may_need_runtime_call_for_type_check ?
2903           LocationSummary::kCallOnSlowPath :
2904           LocationSummary::kNoCall);
2905   locations->SetInAt(0, Location::RequiresRegister());
2906   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2907   if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
2908     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
2909   } else if (Primitive::IsFloatingPointType(value_type)) {
2910     locations->SetInAt(2, Location::RequiresFpuRegister());
2911   } else {
2912     locations->SetInAt(2, Location::RequiresRegister());
2913   }
2914 }
2915 
VisitArraySet(HArraySet * instruction)2916 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2917   Primitive::Type value_type = instruction->GetComponentType();
2918   LocationSummary* locations = instruction->GetLocations();
2919   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2920   bool needs_write_barrier =
2921       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2922 
2923   Register array = InputRegisterAt(instruction, 0);
2924   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2925   CPURegister source = value;
2926   Location index = locations->InAt(1);
2927   size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
2928   MemOperand destination = HeapOperand(array);
2929   MacroAssembler* masm = GetVIXLAssembler();
2930 
2931   if (!needs_write_barrier) {
2932     DCHECK(!may_need_runtime_call_for_type_check);
2933     if (index.IsConstant()) {
2934       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
2935       destination = HeapOperand(array, offset);
2936     } else {
2937       UseScratchRegisterScope temps(masm);
2938       Register temp = temps.AcquireSameSizeAs(array);
2939       if (instruction->GetArray()->IsIntermediateAddress()) {
2940         // We do not need to compute the intermediate address from the array: the
2941         // input instruction has done it already. See the comment in
2942         // `TryExtractArrayAccessAddress()`.
2943         if (kIsDebugBuild) {
2944           HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2945           DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2946         }
2947         temp = array;
2948       } else {
2949         __ Add(temp, array, offset);
2950       }
2951       destination = HeapOperand(temp,
2952                                 XRegisterFrom(index),
2953                                 LSL,
2954                                 Primitive::ComponentSizeShift(value_type));
2955     }
2956     {
2957       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2958       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2959       codegen_->Store(value_type, value, destination);
2960       codegen_->MaybeRecordImplicitNullCheck(instruction);
2961     }
2962   } else {
2963     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2964     vixl::aarch64::Label done;
2965     SlowPathCodeARM64* slow_path = nullptr;
2966     {
2967       // We use a block to end the scratch scope before the write barrier, thus
2968       // freeing the temporary registers so they can be used in `MarkGCCard`.
2969       UseScratchRegisterScope temps(masm);
2970       Register temp = temps.AcquireSameSizeAs(array);
2971       if (index.IsConstant()) {
2972         offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
2973         destination = HeapOperand(array, offset);
2974       } else {
2975         destination = HeapOperand(temp,
2976                                   XRegisterFrom(index),
2977                                   LSL,
2978                                   Primitive::ComponentSizeShift(value_type));
2979       }
2980 
2981       uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2982       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2983       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2984 
2985       if (may_need_runtime_call_for_type_check) {
2986         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
2987         codegen_->AddSlowPath(slow_path);
2988         if (instruction->GetValueCanBeNull()) {
2989           vixl::aarch64::Label non_zero;
2990           __ Cbnz(Register(value), &non_zero);
2991           if (!index.IsConstant()) {
2992             __ Add(temp, array, offset);
2993           }
2994           {
2995             // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
2996             // emitted.
2997             EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2998             __ Str(wzr, destination);
2999             codegen_->MaybeRecordImplicitNullCheck(instruction);
3000           }
3001           __ B(&done);
3002           __ Bind(&non_zero);
3003         }
3004 
3005         // Note that when Baker read barriers are enabled, the type
3006         // checks are performed without read barriers.  This is fine,
3007         // even in the case where a class object is in the from-space
3008         // after the flip, as a comparison involving such a type would
3009         // not produce a false positive; it may of course produce a
3010         // false negative, in which case we would take the ArraySet
3011         // slow path.
3012 
3013         Register temp2 = temps.AcquireSameSizeAs(array);
3014         // /* HeapReference<Class> */ temp = array->klass_
3015         {
3016           // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
3017           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3018           __ Ldr(temp, HeapOperand(array, class_offset));
3019           codegen_->MaybeRecordImplicitNullCheck(instruction);
3020         }
3021         GetAssembler()->MaybeUnpoisonHeapReference(temp);
3022 
3023         // /* HeapReference<Class> */ temp = temp->component_type_
3024         __ Ldr(temp, HeapOperand(temp, component_offset));
3025         // /* HeapReference<Class> */ temp2 = value->klass_
3026         __ Ldr(temp2, HeapOperand(Register(value), class_offset));
3027         // If heap poisoning is enabled, no need to unpoison `temp`
3028         // nor `temp2`, as we are comparing two poisoned references.
3029         __ Cmp(temp, temp2);
3030         temps.Release(temp2);
3031 
3032         if (instruction->StaticTypeOfArrayIsObjectArray()) {
3033           vixl::aarch64::Label do_put;
3034           __ B(eq, &do_put);
3035           // If heap poisoning is enabled, the `temp` reference has
3036           // not been unpoisoned yet; unpoison it now.
3037           GetAssembler()->MaybeUnpoisonHeapReference(temp);
3038 
3039           // /* HeapReference<Class> */ temp = temp->super_class_
3040           __ Ldr(temp, HeapOperand(temp, super_offset));
3041           // If heap poisoning is enabled, no need to unpoison
3042           // `temp`, as we are comparing against null below.
3043           __ Cbnz(temp, slow_path->GetEntryLabel());
3044           __ Bind(&do_put);
3045         } else {
3046           __ B(ne, slow_path->GetEntryLabel());
3047         }
3048       }
3049 
3050       if (kPoisonHeapReferences) {
3051         Register temp2 = temps.AcquireSameSizeAs(array);
3052           DCHECK(value.IsW());
3053         __ Mov(temp2, value.W());
3054         GetAssembler()->PoisonHeapReference(temp2);
3055         source = temp2;
3056       }
3057 
3058       if (!index.IsConstant()) {
3059         __ Add(temp, array, offset);
3060       } else {
3061         // We no longer need the `temp` here so release it as the store below may
3062         // need a scratch register (if the constant index makes the offset too large)
3063         // and the poisoned `source` could be using the other scratch register.
3064         temps.Release(temp);
3065       }
3066       {
3067         // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3068         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3069         __ Str(source, destination);
3070 
3071         if (!may_need_runtime_call_for_type_check) {
3072           codegen_->MaybeRecordImplicitNullCheck(instruction);
3073         }
3074       }
3075     }
3076 
3077     codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
3078 
3079     if (done.IsLinked()) {
3080       __ Bind(&done);
3081     }
3082 
3083     if (slow_path != nullptr) {
3084       __ Bind(slow_path->GetExitLabel());
3085     }
3086   }
3087 }
3088 
VisitBoundsCheck(HBoundsCheck * instruction)3089 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3090   RegisterSet caller_saves = RegisterSet::Empty();
3091   InvokeRuntimeCallingConvention calling_convention;
3092   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3093   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
3094   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
3095   locations->SetInAt(0, Location::RequiresRegister());
3096   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3097 }
3098 
VisitBoundsCheck(HBoundsCheck * instruction)3099 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3100   BoundsCheckSlowPathARM64* slow_path =
3101       new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
3102   codegen_->AddSlowPath(slow_path);
3103   __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
3104   __ B(slow_path->GetEntryLabel(), hs);
3105 }
3106 
VisitClinitCheck(HClinitCheck * check)3107 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
3108   LocationSummary* locations =
3109       new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
3110   locations->SetInAt(0, Location::RequiresRegister());
3111   if (check->HasUses()) {
3112     locations->SetOut(Location::SameAsFirstInput());
3113   }
3114 }
3115 
VisitClinitCheck(HClinitCheck * check)3116 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
3117   // We assume the class is not null.
3118   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
3119       check->GetLoadClass(), check, check->GetDexPc(), true);
3120   codegen_->AddSlowPath(slow_path);
3121   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
3122 }
3123 
IsFloatingPointZeroConstant(HInstruction * inst)3124 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
3125   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
3126       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
3127 }
3128 
GenerateFcmp(HInstruction * instruction)3129 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
3130   FPRegister lhs_reg = InputFPRegisterAt(instruction, 0);
3131   Location rhs_loc = instruction->GetLocations()->InAt(1);
3132   if (rhs_loc.IsConstant()) {
3133     // 0.0 is the only immediate that can be encoded directly in
3134     // an FCMP instruction.
3135     //
3136     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
3137     // specify that in a floating-point comparison, positive zero
3138     // and negative zero are considered equal, so we can use the
3139     // literal 0.0 for both cases here.
3140     //
3141     // Note however that some methods (Float.equal, Float.compare,
3142     // Float.compareTo, Double.equal, Double.compare,
3143     // Double.compareTo, Math.max, Math.min, StrictMath.max,
3144     // StrictMath.min) consider 0.0 to be (strictly) greater than
3145     // -0.0. So if we ever translate calls to these methods into a
3146     // HCompare instruction, we must handle the -0.0 case with
3147     // care here.
3148     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
3149     __ Fcmp(lhs_reg, 0.0);
3150   } else {
3151     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
3152   }
3153 }
3154 
VisitCompare(HCompare * compare)3155 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
3156   LocationSummary* locations =
3157       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
3158   Primitive::Type in_type = compare->InputAt(0)->GetType();
3159   switch (in_type) {
3160     case Primitive::kPrimBoolean:
3161     case Primitive::kPrimByte:
3162     case Primitive::kPrimShort:
3163     case Primitive::kPrimChar:
3164     case Primitive::kPrimInt:
3165     case Primitive::kPrimLong: {
3166       locations->SetInAt(0, Location::RequiresRegister());
3167       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
3168       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3169       break;
3170     }
3171     case Primitive::kPrimFloat:
3172     case Primitive::kPrimDouble: {
3173       locations->SetInAt(0, Location::RequiresFpuRegister());
3174       locations->SetInAt(1,
3175                          IsFloatingPointZeroConstant(compare->InputAt(1))
3176                              ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
3177                              : Location::RequiresFpuRegister());
3178       locations->SetOut(Location::RequiresRegister());
3179       break;
3180     }
3181     default:
3182       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
3183   }
3184 }
3185 
VisitCompare(HCompare * compare)3186 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
3187   Primitive::Type in_type = compare->InputAt(0)->GetType();
3188 
3189   //  0 if: left == right
3190   //  1 if: left  > right
3191   // -1 if: left  < right
3192   switch (in_type) {
3193     case Primitive::kPrimBoolean:
3194     case Primitive::kPrimByte:
3195     case Primitive::kPrimShort:
3196     case Primitive::kPrimChar:
3197     case Primitive::kPrimInt:
3198     case Primitive::kPrimLong: {
3199       Register result = OutputRegister(compare);
3200       Register left = InputRegisterAt(compare, 0);
3201       Operand right = InputOperandAt(compare, 1);
3202       __ Cmp(left, right);
3203       __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
3204       __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
3205       break;
3206     }
3207     case Primitive::kPrimFloat:
3208     case Primitive::kPrimDouble: {
3209       Register result = OutputRegister(compare);
3210       GenerateFcmp(compare);
3211       __ Cset(result, ne);
3212       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3213       break;
3214     }
3215     default:
3216       LOG(FATAL) << "Unimplemented compare type " << in_type;
3217   }
3218 }
3219 
HandleCondition(HCondition * instruction)3220 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3221   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
3222 
3223   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3224     locations->SetInAt(0, Location::RequiresFpuRegister());
3225     locations->SetInAt(1,
3226                        IsFloatingPointZeroConstant(instruction->InputAt(1))
3227                            ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
3228                            : Location::RequiresFpuRegister());
3229   } else {
3230     // Integer cases.
3231     locations->SetInAt(0, Location::RequiresRegister());
3232     locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3233   }
3234 
3235   if (!instruction->IsEmittedAtUseSite()) {
3236     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3237   }
3238 }
3239 
HandleCondition(HCondition * instruction)3240 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3241   if (instruction->IsEmittedAtUseSite()) {
3242     return;
3243   }
3244 
3245   LocationSummary* locations = instruction->GetLocations();
3246   Register res = RegisterFrom(locations->Out(), instruction->GetType());
3247   IfCondition if_cond = instruction->GetCondition();
3248 
3249   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3250     GenerateFcmp(instruction);
3251     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3252   } else {
3253     // Integer cases.
3254     Register lhs = InputRegisterAt(instruction, 0);
3255     Operand rhs = InputOperandAt(instruction, 1);
3256     __ Cmp(lhs, rhs);
3257     __ Cset(res, ARM64Condition(if_cond));
3258   }
3259 }
3260 
3261 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
3262   M(Equal)                                                                               \
3263   M(NotEqual)                                                                            \
3264   M(LessThan)                                                                            \
3265   M(LessThanOrEqual)                                                                     \
3266   M(GreaterThan)                                                                         \
3267   M(GreaterThanOrEqual)                                                                  \
3268   M(Below)                                                                               \
3269   M(BelowOrEqual)                                                                        \
3270   M(Above)                                                                               \
3271   M(AboveOrEqual)
3272 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
3273 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
3274 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3275 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3276 #undef DEFINE_CONDITION_VISITORS
3277 #undef FOR_EACH_CONDITION_INSTRUCTION
3278 
3279 void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3280   DCHECK(instruction->IsDiv() || instruction->IsRem());
3281 
3282   LocationSummary* locations = instruction->GetLocations();
3283   Location second = locations->InAt(1);
3284   DCHECK(second.IsConstant());
3285 
3286   Register out = OutputRegister(instruction);
3287   Register dividend = InputRegisterAt(instruction, 0);
3288   int64_t imm = Int64FromConstant(second.GetConstant());
3289   DCHECK(imm == 1 || imm == -1);
3290 
3291   if (instruction->IsRem()) {
3292     __ Mov(out, 0);
3293   } else {
3294     if (imm == 1) {
3295       __ Mov(out, dividend);
3296     } else {
3297       __ Neg(out, dividend);
3298     }
3299   }
3300 }
3301 
DivRemByPowerOfTwo(HBinaryOperation * instruction)3302 void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
3303   DCHECK(instruction->IsDiv() || instruction->IsRem());
3304 
3305   LocationSummary* locations = instruction->GetLocations();
3306   Location second = locations->InAt(1);
3307   DCHECK(second.IsConstant());
3308 
3309   Register out = OutputRegister(instruction);
3310   Register dividend = InputRegisterAt(instruction, 0);
3311   int64_t imm = Int64FromConstant(second.GetConstant());
3312   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3313   int ctz_imm = CTZ(abs_imm);
3314 
3315   UseScratchRegisterScope temps(GetVIXLAssembler());
3316   Register temp = temps.AcquireSameSizeAs(out);
3317 
3318   if (instruction->IsDiv()) {
3319     __ Add(temp, dividend, abs_imm - 1);
3320     __ Cmp(dividend, 0);
3321     __ Csel(out, temp, dividend, lt);
3322     if (imm > 0) {
3323       __ Asr(out, out, ctz_imm);
3324     } else {
3325       __ Neg(out, Operand(out, ASR, ctz_imm));
3326     }
3327   } else {
3328     int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64;
3329     __ Asr(temp, dividend, bits - 1);
3330     __ Lsr(temp, temp, bits - ctz_imm);
3331     __ Add(out, dividend, temp);
3332     __ And(out, out, abs_imm - 1);
3333     __ Sub(out, out, temp);
3334   }
3335 }
3336 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3337 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3338   DCHECK(instruction->IsDiv() || instruction->IsRem());
3339 
3340   LocationSummary* locations = instruction->GetLocations();
3341   Location second = locations->InAt(1);
3342   DCHECK(second.IsConstant());
3343 
3344   Register out = OutputRegister(instruction);
3345   Register dividend = InputRegisterAt(instruction, 0);
3346   int64_t imm = Int64FromConstant(second.GetConstant());
3347 
3348   Primitive::Type type = instruction->GetResultType();
3349   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
3350 
3351   int64_t magic;
3352   int shift;
3353   CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift);
3354 
3355   UseScratchRegisterScope temps(GetVIXLAssembler());
3356   Register temp = temps.AcquireSameSizeAs(out);
3357 
3358   // temp = get_high(dividend * magic)
3359   __ Mov(temp, magic);
3360   if (type == Primitive::kPrimLong) {
3361     __ Smulh(temp, dividend, temp);
3362   } else {
3363     __ Smull(temp.X(), dividend, temp);
3364     __ Lsr(temp.X(), temp.X(), 32);
3365   }
3366 
3367   if (imm > 0 && magic < 0) {
3368     __ Add(temp, temp, dividend);
3369   } else if (imm < 0 && magic > 0) {
3370     __ Sub(temp, temp, dividend);
3371   }
3372 
3373   if (shift != 0) {
3374     __ Asr(temp, temp, shift);
3375   }
3376 
3377   if (instruction->IsDiv()) {
3378     __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
3379   } else {
3380     __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
3381     // TODO: Strength reduction for msub.
3382     Register temp_imm = temps.AcquireSameSizeAs(out);
3383     __ Mov(temp_imm, imm);
3384     __ Msub(out, temp, temp_imm, dividend);
3385   }
3386 }
3387 
GenerateDivRemIntegral(HBinaryOperation * instruction)3388 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3389   DCHECK(instruction->IsDiv() || instruction->IsRem());
3390   Primitive::Type type = instruction->GetResultType();
3391   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
3392 
3393   LocationSummary* locations = instruction->GetLocations();
3394   Register out = OutputRegister(instruction);
3395   Location second = locations->InAt(1);
3396 
3397   if (second.IsConstant()) {
3398     int64_t imm = Int64FromConstant(second.GetConstant());
3399 
3400     if (imm == 0) {
3401       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3402     } else if (imm == 1 || imm == -1) {
3403       DivRemOneOrMinusOne(instruction);
3404     } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3405       DivRemByPowerOfTwo(instruction);
3406     } else {
3407       DCHECK(imm <= -2 || imm >= 2);
3408       GenerateDivRemWithAnyConstant(instruction);
3409     }
3410   } else {
3411     Register dividend = InputRegisterAt(instruction, 0);
3412     Register divisor = InputRegisterAt(instruction, 1);
3413     if (instruction->IsDiv()) {
3414       __ Sdiv(out, dividend, divisor);
3415     } else {
3416       UseScratchRegisterScope temps(GetVIXLAssembler());
3417       Register temp = temps.AcquireSameSizeAs(out);
3418       __ Sdiv(temp, dividend, divisor);
3419       __ Msub(out, temp, divisor, dividend);
3420     }
3421   }
3422 }
3423 
VisitDiv(HDiv * div)3424 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3425   LocationSummary* locations =
3426       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3427   switch (div->GetResultType()) {
3428     case Primitive::kPrimInt:
3429     case Primitive::kPrimLong:
3430       locations->SetInAt(0, Location::RequiresRegister());
3431       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3432       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3433       break;
3434 
3435     case Primitive::kPrimFloat:
3436     case Primitive::kPrimDouble:
3437       locations->SetInAt(0, Location::RequiresFpuRegister());
3438       locations->SetInAt(1, Location::RequiresFpuRegister());
3439       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3440       break;
3441 
3442     default:
3443       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3444   }
3445 }
3446 
VisitDiv(HDiv * div)3447 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3448   Primitive::Type type = div->GetResultType();
3449   switch (type) {
3450     case Primitive::kPrimInt:
3451     case Primitive::kPrimLong:
3452       GenerateDivRemIntegral(div);
3453       break;
3454 
3455     case Primitive::kPrimFloat:
3456     case Primitive::kPrimDouble:
3457       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3458       break;
3459 
3460     default:
3461       LOG(FATAL) << "Unexpected div type " << type;
3462   }
3463 }
3464 
VisitDivZeroCheck(HDivZeroCheck * instruction)3465 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3466   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3467   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3468 }
3469 
VisitDivZeroCheck(HDivZeroCheck * instruction)3470 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3471   SlowPathCodeARM64* slow_path =
3472       new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM64(instruction);
3473   codegen_->AddSlowPath(slow_path);
3474   Location value = instruction->GetLocations()->InAt(0);
3475 
3476   Primitive::Type type = instruction->GetType();
3477 
3478   if (!Primitive::IsIntegralType(type)) {
3479     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3480     return;
3481   }
3482 
3483   if (value.IsConstant()) {
3484     int64_t divisor = Int64ConstantFrom(value);
3485     if (divisor == 0) {
3486       __ B(slow_path->GetEntryLabel());
3487     } else {
3488       // A division by a non-null constant is valid. We don't need to perform
3489       // any check, so simply fall through.
3490     }
3491   } else {
3492     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3493   }
3494 }
3495 
VisitDoubleConstant(HDoubleConstant * constant)3496 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3497   LocationSummary* locations =
3498       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
3499   locations->SetOut(Location::ConstantLocation(constant));
3500 }
3501 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3502 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3503     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3504   // Will be generated at use site.
3505 }
3506 
VisitExit(HExit * exit)3507 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3508   exit->SetLocations(nullptr);
3509 }
3510 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)3511 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3512 }
3513 
VisitFloatConstant(HFloatConstant * constant)3514 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3515   LocationSummary* locations =
3516       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
3517   locations->SetOut(Location::ConstantLocation(constant));
3518 }
3519 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3520 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3521   // Will be generated at use site.
3522 }
3523 
HandleGoto(HInstruction * got,HBasicBlock * successor)3524 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3525   DCHECK(!successor->IsExitBlock());
3526   HBasicBlock* block = got->GetBlock();
3527   HInstruction* previous = got->GetPrevious();
3528   HLoopInformation* info = block->GetLoopInformation();
3529 
3530   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3531     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
3532     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3533     return;
3534   }
3535   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3536     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3537   }
3538   if (!codegen_->GoesToNextBlock(block, successor)) {
3539     __ B(codegen_->GetLabelOf(successor));
3540   }
3541 }
3542 
VisitGoto(HGoto * got)3543 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3544   got->SetLocations(nullptr);
3545 }
3546 
VisitGoto(HGoto * got)3547 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3548   HandleGoto(got, got->GetSuccessor());
3549 }
3550 
VisitTryBoundary(HTryBoundary * try_boundary)3551 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3552   try_boundary->SetLocations(nullptr);
3553 }
3554 
VisitTryBoundary(HTryBoundary * try_boundary)3555 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3556   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3557   if (!successor->IsExitBlock()) {
3558     HandleGoto(try_boundary, successor);
3559   }
3560 }
3561 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3562 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3563                                                           size_t condition_input_index,
3564                                                           vixl::aarch64::Label* true_target,
3565                                                           vixl::aarch64::Label* false_target) {
3566   HInstruction* cond = instruction->InputAt(condition_input_index);
3567 
3568   if (true_target == nullptr && false_target == nullptr) {
3569     // Nothing to do. The code always falls through.
3570     return;
3571   } else if (cond->IsIntConstant()) {
3572     // Constant condition, statically compared against "true" (integer value 1).
3573     if (cond->AsIntConstant()->IsTrue()) {
3574       if (true_target != nullptr) {
3575         __ B(true_target);
3576       }
3577     } else {
3578       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3579       if (false_target != nullptr) {
3580         __ B(false_target);
3581       }
3582     }
3583     return;
3584   }
3585 
3586   // The following code generates these patterns:
3587   //  (1) true_target == nullptr && false_target != nullptr
3588   //        - opposite condition true => branch to false_target
3589   //  (2) true_target != nullptr && false_target == nullptr
3590   //        - condition true => branch to true_target
3591   //  (3) true_target != nullptr && false_target != nullptr
3592   //        - condition true => branch to true_target
3593   //        - branch to false_target
3594   if (IsBooleanValueOrMaterializedCondition(cond)) {
3595     // The condition instruction has been materialized, compare the output to 0.
3596     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3597     DCHECK(cond_val.IsRegister());
3598       if (true_target == nullptr) {
3599       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3600     } else {
3601       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3602     }
3603   } else {
3604     // The condition instruction has not been materialized, use its inputs as
3605     // the comparison and its condition as the branch condition.
3606     HCondition* condition = cond->AsCondition();
3607 
3608     Primitive::Type type = condition->InputAt(0)->GetType();
3609     if (Primitive::IsFloatingPointType(type)) {
3610       GenerateFcmp(condition);
3611       if (true_target == nullptr) {
3612         IfCondition opposite_condition = condition->GetOppositeCondition();
3613         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3614       } else {
3615         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3616       }
3617     } else {
3618       // Integer cases.
3619       Register lhs = InputRegisterAt(condition, 0);
3620       Operand rhs = InputOperandAt(condition, 1);
3621 
3622       Condition arm64_cond;
3623       vixl::aarch64::Label* non_fallthrough_target;
3624       if (true_target == nullptr) {
3625         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3626         non_fallthrough_target = false_target;
3627       } else {
3628         arm64_cond = ARM64Condition(condition->GetCondition());
3629         non_fallthrough_target = true_target;
3630       }
3631 
3632       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3633           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3634         switch (arm64_cond) {
3635           case eq:
3636             __ Cbz(lhs, non_fallthrough_target);
3637             break;
3638           case ne:
3639             __ Cbnz(lhs, non_fallthrough_target);
3640             break;
3641           case lt:
3642             // Test the sign bit and branch accordingly.
3643             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3644             break;
3645           case ge:
3646             // Test the sign bit and branch accordingly.
3647             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3648             break;
3649           default:
3650             // Without the `static_cast` the compiler throws an error for
3651             // `-Werror=sign-promo`.
3652             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3653         }
3654       } else {
3655         __ Cmp(lhs, rhs);
3656         __ B(arm64_cond, non_fallthrough_target);
3657       }
3658     }
3659   }
3660 
3661   // If neither branch falls through (case 3), the conditional branch to `true_target`
3662   // was already emitted (case 2) and we need to emit a jump to `false_target`.
3663   if (true_target != nullptr && false_target != nullptr) {
3664     __ B(false_target);
3665   }
3666 }
3667 
VisitIf(HIf * if_instr)3668 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3669   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
3670   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3671     locations->SetInAt(0, Location::RequiresRegister());
3672   }
3673 }
3674 
VisitIf(HIf * if_instr)3675 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3676   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3677   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3678   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3679   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3680     true_target = nullptr;
3681   }
3682   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3683   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3684     false_target = nullptr;
3685   }
3686   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
3687 }
3688 
VisitDeoptimize(HDeoptimize * deoptimize)3689 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3690   LocationSummary* locations = new (GetGraph()->GetArena())
3691       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3692   InvokeRuntimeCallingConvention calling_convention;
3693   RegisterSet caller_saves = RegisterSet::Empty();
3694   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3695   locations->SetCustomSlowPathCallerSaves(caller_saves);
3696   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3697     locations->SetInAt(0, Location::RequiresRegister());
3698   }
3699 }
3700 
VisitDeoptimize(HDeoptimize * deoptimize)3701 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3702   SlowPathCodeARM64* slow_path =
3703       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3704   GenerateTestAndBranch(deoptimize,
3705                         /* condition_input_index */ 0,
3706                         slow_path->GetEntryLabel(),
3707                         /* false_target */ nullptr);
3708 }
3709 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3710 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3711   LocationSummary* locations = new (GetGraph()->GetArena())
3712       LocationSummary(flag, LocationSummary::kNoCall);
3713   locations->SetOut(Location::RequiresRegister());
3714 }
3715 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3716 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3717   __ Ldr(OutputRegister(flag),
3718          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3719 }
3720 
IsConditionOnFloatingPointValues(HInstruction * condition)3721 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3722   return condition->IsCondition() &&
3723          Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
3724 }
3725 
GetConditionForSelect(HCondition * condition)3726 static inline Condition GetConditionForSelect(HCondition* condition) {
3727   IfCondition cond = condition->AsCondition()->GetCondition();
3728   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3729                                                      : ARM64Condition(cond);
3730 }
3731 
VisitSelect(HSelect * select)3732 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3733   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
3734   if (Primitive::IsFloatingPointType(select->GetType())) {
3735     locations->SetInAt(0, Location::RequiresFpuRegister());
3736     locations->SetInAt(1, Location::RequiresFpuRegister());
3737     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3738   } else {
3739     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3740     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3741     bool is_true_value_constant = cst_true_value != nullptr;
3742     bool is_false_value_constant = cst_false_value != nullptr;
3743     // Ask VIXL whether we should synthesize constants in registers.
3744     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3745     Operand true_op = is_true_value_constant ?
3746         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3747     Operand false_op = is_false_value_constant ?
3748         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3749     bool true_value_in_register = false;
3750     bool false_value_in_register = false;
3751     MacroAssembler::GetCselSynthesisInformation(
3752         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3753     true_value_in_register |= !is_true_value_constant;
3754     false_value_in_register |= !is_false_value_constant;
3755 
3756     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3757                                                  : Location::ConstantLocation(cst_true_value));
3758     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3759                                                   : Location::ConstantLocation(cst_false_value));
3760     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3761   }
3762 
3763   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3764     locations->SetInAt(2, Location::RequiresRegister());
3765   }
3766 }
3767 
VisitSelect(HSelect * select)3768 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3769   HInstruction* cond = select->GetCondition();
3770   Condition csel_cond;
3771 
3772   if (IsBooleanValueOrMaterializedCondition(cond)) {
3773     if (cond->IsCondition() && cond->GetNext() == select) {
3774       // Use the condition flags set by the previous instruction.
3775       csel_cond = GetConditionForSelect(cond->AsCondition());
3776     } else {
3777       __ Cmp(InputRegisterAt(select, 2), 0);
3778       csel_cond = ne;
3779     }
3780   } else if (IsConditionOnFloatingPointValues(cond)) {
3781     GenerateFcmp(cond);
3782     csel_cond = GetConditionForSelect(cond->AsCondition());
3783   } else {
3784     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3785     csel_cond = GetConditionForSelect(cond->AsCondition());
3786   }
3787 
3788   if (Primitive::IsFloatingPointType(select->GetType())) {
3789     __ Fcsel(OutputFPRegister(select),
3790              InputFPRegisterAt(select, 1),
3791              InputFPRegisterAt(select, 0),
3792              csel_cond);
3793   } else {
3794     __ Csel(OutputRegister(select),
3795             InputOperandAt(select, 1),
3796             InputOperandAt(select, 0),
3797             csel_cond);
3798   }
3799 }
3800 
VisitNativeDebugInfo(HNativeDebugInfo * info)3801 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3802   new (GetGraph()->GetArena()) LocationSummary(info);
3803 }
3804 
VisitNativeDebugInfo(HNativeDebugInfo *)3805 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
3806   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3807 }
3808 
GenerateNop()3809 void CodeGeneratorARM64::GenerateNop() {
3810   __ Nop();
3811 }
3812 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3813 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3814   HandleFieldGet(instruction, instruction->GetFieldInfo());
3815 }
3816 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3817 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3818   HandleFieldGet(instruction, instruction->GetFieldInfo());
3819 }
3820 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3821 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3822   HandleFieldSet(instruction);
3823 }
3824 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3825 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3826   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
3827 }
3828 
3829 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)3830 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
3831   if (kEmitCompilerReadBarrier &&
3832       (kUseBakerReadBarrier ||
3833           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
3834           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
3835           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
3836     return 1;
3837   }
3838   return 0;
3839 }
3840 
3841 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
3842 // interface pointer, one for loading the current interface.
3843 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)3844 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
3845   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
3846     return 3;
3847   }
3848   return 1 + NumberOfInstanceOfTemps(type_check_kind);
3849 }
3850 
VisitInstanceOf(HInstanceOf * instruction)3851 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
3852   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
3853   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3854   bool baker_read_barrier_slow_path = false;
3855   switch (type_check_kind) {
3856     case TypeCheckKind::kExactCheck:
3857     case TypeCheckKind::kAbstractClassCheck:
3858     case TypeCheckKind::kClassHierarchyCheck:
3859     case TypeCheckKind::kArrayObjectCheck:
3860       call_kind =
3861           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
3862       baker_read_barrier_slow_path = kUseBakerReadBarrier;
3863       break;
3864     case TypeCheckKind::kArrayCheck:
3865     case TypeCheckKind::kUnresolvedCheck:
3866     case TypeCheckKind::kInterfaceCheck:
3867       call_kind = LocationSummary::kCallOnSlowPath;
3868       break;
3869   }
3870 
3871   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
3872   if (baker_read_barrier_slow_path) {
3873     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3874   }
3875   locations->SetInAt(0, Location::RequiresRegister());
3876   locations->SetInAt(1, Location::RequiresRegister());
3877   // The "out" register is used as a temporary, so it overlaps with the inputs.
3878   // Note that TypeCheckSlowPathARM64 uses this register too.
3879   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3880   // Add temps if necessary for read barriers.
3881   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
3882 }
3883 
VisitInstanceOf(HInstanceOf * instruction)3884 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
3885   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3886   LocationSummary* locations = instruction->GetLocations();
3887   Location obj_loc = locations->InAt(0);
3888   Register obj = InputRegisterAt(instruction, 0);
3889   Register cls = InputRegisterAt(instruction, 1);
3890   Location out_loc = locations->Out();
3891   Register out = OutputRegister(instruction);
3892   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
3893   DCHECK_LE(num_temps, 1u);
3894   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
3895   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3896   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3897   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3898   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3899 
3900   vixl::aarch64::Label done, zero;
3901   SlowPathCodeARM64* slow_path = nullptr;
3902 
3903   // Return 0 if `obj` is null.
3904   // Avoid null check if we know `obj` is not null.
3905   if (instruction->MustDoNullCheck()) {
3906     __ Cbz(obj, &zero);
3907   }
3908 
3909   switch (type_check_kind) {
3910     case TypeCheckKind::kExactCheck: {
3911       // /* HeapReference<Class> */ out = obj->klass_
3912       GenerateReferenceLoadTwoRegisters(instruction,
3913                                         out_loc,
3914                                         obj_loc,
3915                                         class_offset,
3916                                         maybe_temp_loc,
3917                                         kCompilerReadBarrierOption);
3918       __ Cmp(out, cls);
3919       __ Cset(out, eq);
3920       if (zero.IsLinked()) {
3921         __ B(&done);
3922       }
3923       break;
3924     }
3925 
3926     case TypeCheckKind::kAbstractClassCheck: {
3927       // /* HeapReference<Class> */ out = obj->klass_
3928       GenerateReferenceLoadTwoRegisters(instruction,
3929                                         out_loc,
3930                                         obj_loc,
3931                                         class_offset,
3932                                         maybe_temp_loc,
3933                                         kCompilerReadBarrierOption);
3934       // If the class is abstract, we eagerly fetch the super class of the
3935       // object to avoid doing a comparison we know will fail.
3936       vixl::aarch64::Label loop, success;
3937       __ Bind(&loop);
3938       // /* HeapReference<Class> */ out = out->super_class_
3939       GenerateReferenceLoadOneRegister(instruction,
3940                                        out_loc,
3941                                        super_offset,
3942                                        maybe_temp_loc,
3943                                        kCompilerReadBarrierOption);
3944       // If `out` is null, we use it for the result, and jump to `done`.
3945       __ Cbz(out, &done);
3946       __ Cmp(out, cls);
3947       __ B(ne, &loop);
3948       __ Mov(out, 1);
3949       if (zero.IsLinked()) {
3950         __ B(&done);
3951       }
3952       break;
3953     }
3954 
3955     case TypeCheckKind::kClassHierarchyCheck: {
3956       // /* HeapReference<Class> */ out = obj->klass_
3957       GenerateReferenceLoadTwoRegisters(instruction,
3958                                         out_loc,
3959                                         obj_loc,
3960                                         class_offset,
3961                                         maybe_temp_loc,
3962                                         kCompilerReadBarrierOption);
3963       // Walk over the class hierarchy to find a match.
3964       vixl::aarch64::Label loop, success;
3965       __ Bind(&loop);
3966       __ Cmp(out, cls);
3967       __ B(eq, &success);
3968       // /* HeapReference<Class> */ out = out->super_class_
3969       GenerateReferenceLoadOneRegister(instruction,
3970                                        out_loc,
3971                                        super_offset,
3972                                        maybe_temp_loc,
3973                                        kCompilerReadBarrierOption);
3974       __ Cbnz(out, &loop);
3975       // If `out` is null, we use it for the result, and jump to `done`.
3976       __ B(&done);
3977       __ Bind(&success);
3978       __ Mov(out, 1);
3979       if (zero.IsLinked()) {
3980         __ B(&done);
3981       }
3982       break;
3983     }
3984 
3985     case TypeCheckKind::kArrayObjectCheck: {
3986       // /* HeapReference<Class> */ out = obj->klass_
3987       GenerateReferenceLoadTwoRegisters(instruction,
3988                                         out_loc,
3989                                         obj_loc,
3990                                         class_offset,
3991                                         maybe_temp_loc,
3992                                         kCompilerReadBarrierOption);
3993       // Do an exact check.
3994       vixl::aarch64::Label exact_check;
3995       __ Cmp(out, cls);
3996       __ B(eq, &exact_check);
3997       // Otherwise, we need to check that the object's class is a non-primitive array.
3998       // /* HeapReference<Class> */ out = out->component_type_
3999       GenerateReferenceLoadOneRegister(instruction,
4000                                        out_loc,
4001                                        component_offset,
4002                                        maybe_temp_loc,
4003                                        kCompilerReadBarrierOption);
4004       // If `out` is null, we use it for the result, and jump to `done`.
4005       __ Cbz(out, &done);
4006       __ Ldrh(out, HeapOperand(out, primitive_offset));
4007       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4008       __ Cbnz(out, &zero);
4009       __ Bind(&exact_check);
4010       __ Mov(out, 1);
4011       __ B(&done);
4012       break;
4013     }
4014 
4015     case TypeCheckKind::kArrayCheck: {
4016       // No read barrier since the slow path will retry upon failure.
4017       // /* HeapReference<Class> */ out = obj->klass_
4018       GenerateReferenceLoadTwoRegisters(instruction,
4019                                         out_loc,
4020                                         obj_loc,
4021                                         class_offset,
4022                                         maybe_temp_loc,
4023                                         kWithoutReadBarrier);
4024       __ Cmp(out, cls);
4025       DCHECK(locations->OnlyCallsOnSlowPath());
4026       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
4027                                                                       /* is_fatal */ false);
4028       codegen_->AddSlowPath(slow_path);
4029       __ B(ne, slow_path->GetEntryLabel());
4030       __ Mov(out, 1);
4031       if (zero.IsLinked()) {
4032         __ B(&done);
4033       }
4034       break;
4035     }
4036 
4037     case TypeCheckKind::kUnresolvedCheck:
4038     case TypeCheckKind::kInterfaceCheck: {
4039       // Note that we indeed only call on slow path, but we always go
4040       // into the slow path for the unresolved and interface check
4041       // cases.
4042       //
4043       // We cannot directly call the InstanceofNonTrivial runtime
4044       // entry point without resorting to a type checking slow path
4045       // here (i.e. by calling InvokeRuntime directly), as it would
4046       // require to assign fixed registers for the inputs of this
4047       // HInstanceOf instruction (following the runtime calling
4048       // convention), which might be cluttered by the potential first
4049       // read barrier emission at the beginning of this method.
4050       //
4051       // TODO: Introduce a new runtime entry point taking the object
4052       // to test (instead of its class) as argument, and let it deal
4053       // with the read barrier issues. This will let us refactor this
4054       // case of the `switch` code as it was previously (with a direct
4055       // call to the runtime not using a type checking slow path).
4056       // This should also be beneficial for the other cases above.
4057       DCHECK(locations->OnlyCallsOnSlowPath());
4058       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
4059                                                                       /* is_fatal */ false);
4060       codegen_->AddSlowPath(slow_path);
4061       __ B(slow_path->GetEntryLabel());
4062       if (zero.IsLinked()) {
4063         __ B(&done);
4064       }
4065       break;
4066     }
4067   }
4068 
4069   if (zero.IsLinked()) {
4070     __ Bind(&zero);
4071     __ Mov(out, 0);
4072   }
4073 
4074   if (done.IsLinked()) {
4075     __ Bind(&done);
4076   }
4077 
4078   if (slow_path != nullptr) {
4079     __ Bind(slow_path->GetExitLabel());
4080   }
4081 }
4082 
VisitCheckCast(HCheckCast * instruction)4083 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4084   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4085   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
4086 
4087   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4088   switch (type_check_kind) {
4089     case TypeCheckKind::kExactCheck:
4090     case TypeCheckKind::kAbstractClassCheck:
4091     case TypeCheckKind::kClassHierarchyCheck:
4092     case TypeCheckKind::kArrayObjectCheck:
4093       call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
4094           LocationSummary::kCallOnSlowPath :
4095           LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
4096       break;
4097     case TypeCheckKind::kArrayCheck:
4098     case TypeCheckKind::kUnresolvedCheck:
4099     case TypeCheckKind::kInterfaceCheck:
4100       call_kind = LocationSummary::kCallOnSlowPath;
4101       break;
4102   }
4103 
4104   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4105   locations->SetInAt(0, Location::RequiresRegister());
4106   locations->SetInAt(1, Location::RequiresRegister());
4107   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
4108   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
4109 }
4110 
VisitCheckCast(HCheckCast * instruction)4111 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4112   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4113   LocationSummary* locations = instruction->GetLocations();
4114   Location obj_loc = locations->InAt(0);
4115   Register obj = InputRegisterAt(instruction, 0);
4116   Register cls = InputRegisterAt(instruction, 1);
4117   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
4118   DCHECK_GE(num_temps, 1u);
4119   DCHECK_LE(num_temps, 3u);
4120   Location temp_loc = locations->GetTemp(0);
4121   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4122   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4123   Register temp = WRegisterFrom(temp_loc);
4124   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4125   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4126   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4127   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4128   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4129   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4130   const uint32_t object_array_data_offset =
4131       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4132 
4133   bool is_type_check_slow_path_fatal = false;
4134   // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
4135   // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
4136   // read barriers is done for performance and code size reasons.
4137   if (!kEmitCompilerReadBarrier) {
4138     is_type_check_slow_path_fatal =
4139         (type_check_kind == TypeCheckKind::kExactCheck ||
4140          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
4141          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
4142          type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
4143         !instruction->CanThrowIntoCatchBlock();
4144   }
4145   SlowPathCodeARM64* type_check_slow_path =
4146       new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
4147                                                           is_type_check_slow_path_fatal);
4148   codegen_->AddSlowPath(type_check_slow_path);
4149 
4150   vixl::aarch64::Label done;
4151   // Avoid null check if we know obj is not null.
4152   if (instruction->MustDoNullCheck()) {
4153     __ Cbz(obj, &done);
4154   }
4155 
4156   switch (type_check_kind) {
4157     case TypeCheckKind::kExactCheck:
4158     case TypeCheckKind::kArrayCheck: {
4159       // /* HeapReference<Class> */ temp = obj->klass_
4160       GenerateReferenceLoadTwoRegisters(instruction,
4161                                         temp_loc,
4162                                         obj_loc,
4163                                         class_offset,
4164                                         maybe_temp2_loc,
4165                                         kWithoutReadBarrier);
4166 
4167       __ Cmp(temp, cls);
4168       // Jump to slow path for throwing the exception or doing a
4169       // more involved array check.
4170       __ B(ne, type_check_slow_path->GetEntryLabel());
4171       break;
4172     }
4173 
4174     case TypeCheckKind::kAbstractClassCheck: {
4175       // /* HeapReference<Class> */ temp = obj->klass_
4176       GenerateReferenceLoadTwoRegisters(instruction,
4177                                         temp_loc,
4178                                         obj_loc,
4179                                         class_offset,
4180                                         maybe_temp2_loc,
4181                                         kWithoutReadBarrier);
4182 
4183       // If the class is abstract, we eagerly fetch the super class of the
4184       // object to avoid doing a comparison we know will fail.
4185       vixl::aarch64::Label loop;
4186       __ Bind(&loop);
4187       // /* HeapReference<Class> */ temp = temp->super_class_
4188       GenerateReferenceLoadOneRegister(instruction,
4189                                        temp_loc,
4190                                        super_offset,
4191                                        maybe_temp2_loc,
4192                                        kWithoutReadBarrier);
4193 
4194       // If the class reference currently in `temp` is null, jump to the slow path to throw the
4195       // exception.
4196       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4197       // Otherwise, compare classes.
4198       __ Cmp(temp, cls);
4199       __ B(ne, &loop);
4200       break;
4201     }
4202 
4203     case TypeCheckKind::kClassHierarchyCheck: {
4204       // /* HeapReference<Class> */ temp = obj->klass_
4205       GenerateReferenceLoadTwoRegisters(instruction,
4206                                         temp_loc,
4207                                         obj_loc,
4208                                         class_offset,
4209                                         maybe_temp2_loc,
4210                                         kWithoutReadBarrier);
4211 
4212       // Walk over the class hierarchy to find a match.
4213       vixl::aarch64::Label loop;
4214       __ Bind(&loop);
4215       __ Cmp(temp, cls);
4216       __ B(eq, &done);
4217 
4218       // /* HeapReference<Class> */ temp = temp->super_class_
4219       GenerateReferenceLoadOneRegister(instruction,
4220                                        temp_loc,
4221                                        super_offset,
4222                                        maybe_temp2_loc,
4223                                        kWithoutReadBarrier);
4224 
4225       // If the class reference currently in `temp` is not null, jump
4226       // back at the beginning of the loop.
4227       __ Cbnz(temp, &loop);
4228       // Otherwise, jump to the slow path to throw the exception.
4229       __ B(type_check_slow_path->GetEntryLabel());
4230       break;
4231     }
4232 
4233     case TypeCheckKind::kArrayObjectCheck: {
4234       // /* HeapReference<Class> */ temp = obj->klass_
4235       GenerateReferenceLoadTwoRegisters(instruction,
4236                                         temp_loc,
4237                                         obj_loc,
4238                                         class_offset,
4239                                         maybe_temp2_loc,
4240                                         kWithoutReadBarrier);
4241 
4242       // Do an exact check.
4243       __ Cmp(temp, cls);
4244       __ B(eq, &done);
4245 
4246       // Otherwise, we need to check that the object's class is a non-primitive array.
4247       // /* HeapReference<Class> */ temp = temp->component_type_
4248       GenerateReferenceLoadOneRegister(instruction,
4249                                        temp_loc,
4250                                        component_offset,
4251                                        maybe_temp2_loc,
4252                                        kWithoutReadBarrier);
4253 
4254       // If the component type is null, jump to the slow path to throw the exception.
4255       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4256       // Otherwise, the object is indeed an array. Further check that this component type is not a
4257       // primitive type.
4258       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4259       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4260       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4261       break;
4262     }
4263 
4264     case TypeCheckKind::kUnresolvedCheck:
4265       // We always go into the type check slow path for the unresolved check cases.
4266       //
4267       // We cannot directly call the CheckCast runtime entry point
4268       // without resorting to a type checking slow path here (i.e. by
4269       // calling InvokeRuntime directly), as it would require to
4270       // assign fixed registers for the inputs of this HInstanceOf
4271       // instruction (following the runtime calling convention), which
4272       // might be cluttered by the potential first read barrier
4273       // emission at the beginning of this method.
4274       __ B(type_check_slow_path->GetEntryLabel());
4275       break;
4276     case TypeCheckKind::kInterfaceCheck: {
4277       // /* HeapReference<Class> */ temp = obj->klass_
4278       GenerateReferenceLoadTwoRegisters(instruction,
4279                                         temp_loc,
4280                                         obj_loc,
4281                                         class_offset,
4282                                         maybe_temp2_loc,
4283                                         kWithoutReadBarrier);
4284 
4285       // /* HeapReference<Class> */ temp = temp->iftable_
4286       GenerateReferenceLoadTwoRegisters(instruction,
4287                                         temp_loc,
4288                                         temp_loc,
4289                                         iftable_offset,
4290                                         maybe_temp2_loc,
4291                                         kWithoutReadBarrier);
4292       // Iftable is never null.
4293       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4294       // Loop through the iftable and check if any class matches.
4295       vixl::aarch64::Label start_loop;
4296       __ Bind(&start_loop);
4297       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4298       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4299       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4300       // Go to next interface.
4301       __ Add(temp, temp, 2 * kHeapReferenceSize);
4302       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4303       // Compare the classes and continue the loop if they do not match.
4304       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4305       __ B(ne, &start_loop);
4306       break;
4307     }
4308   }
4309   __ Bind(&done);
4310 
4311   __ Bind(type_check_slow_path->GetExitLabel());
4312 }
4313 
VisitIntConstant(HIntConstant * constant)4314 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4315   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
4316   locations->SetOut(Location::ConstantLocation(constant));
4317 }
4318 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)4319 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
4320   // Will be generated at use site.
4321 }
4322 
VisitNullConstant(HNullConstant * constant)4323 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4324   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
4325   locations->SetOut(Location::ConstantLocation(constant));
4326 }
4327 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)4328 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
4329   // Will be generated at use site.
4330 }
4331 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4332 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4333   // The trampoline uses the same calling convention as dex calling conventions,
4334   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4335   // the method_idx.
4336   HandleInvoke(invoke);
4337 }
4338 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4339 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4340   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4341 }
4342 
HandleInvoke(HInvoke * invoke)4343 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4344   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4345   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4346 }
4347 
VisitInvokeInterface(HInvokeInterface * invoke)4348 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4349   HandleInvoke(invoke);
4350 }
4351 
VisitInvokeInterface(HInvokeInterface * invoke)4352 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4353   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4354   LocationSummary* locations = invoke->GetLocations();
4355   Register temp = XRegisterFrom(locations->GetTemp(0));
4356   Location receiver = locations->InAt(0);
4357   Offset class_offset = mirror::Object::ClassOffset();
4358   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4359 
4360   // The register ip1 is required to be used for the hidden argument in
4361   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4362   MacroAssembler* masm = GetVIXLAssembler();
4363   UseScratchRegisterScope scratch_scope(masm);
4364   scratch_scope.Exclude(ip1);
4365   __ Mov(ip1, invoke->GetDexMethodIndex());
4366 
4367   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4368   if (receiver.IsStackSlot()) {
4369     __ Ldr(temp.W(), StackOperandFrom(receiver));
4370     {
4371       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4372       // /* HeapReference<Class> */ temp = temp->klass_
4373       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4374       codegen_->MaybeRecordImplicitNullCheck(invoke);
4375     }
4376   } else {
4377     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4378     // /* HeapReference<Class> */ temp = receiver->klass_
4379     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4380     codegen_->MaybeRecordImplicitNullCheck(invoke);
4381   }
4382 
4383   // Instead of simply (possibly) unpoisoning `temp` here, we should
4384   // emit a read barrier for the previous class reference load.
4385   // However this is not required in practice, as this is an
4386   // intermediate/temporary reference and because the current
4387   // concurrent copying collector keeps the from-space memory
4388   // intact/accessible until the end of the marking phase (the
4389   // concurrent copying collector may not in the future).
4390   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4391   __ Ldr(temp,
4392       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4393   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4394       invoke->GetImtIndex(), kArm64PointerSize));
4395   // temp = temp->GetImtEntryAt(method_offset);
4396   __ Ldr(temp, MemOperand(temp, method_offset));
4397   // lr = temp->GetEntryPoint();
4398   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4399 
4400   {
4401     // Ensure the pc position is recorded immediately after the `blr` instruction.
4402     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4403 
4404     // lr();
4405     __ blr(lr);
4406     DCHECK(!codegen_->IsLeafMethod());
4407     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4408   }
4409 }
4410 
VisitInvokeVirtual(HInvokeVirtual * invoke)4411 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4412   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
4413   if (intrinsic.TryDispatch(invoke)) {
4414     return;
4415   }
4416 
4417   HandleInvoke(invoke);
4418 }
4419 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4420 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4421   // Explicit clinit checks triggered by static invokes must have been pruned by
4422   // art::PrepareForRegisterAllocation.
4423   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4424 
4425   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
4426   if (intrinsic.TryDispatch(invoke)) {
4427     return;
4428   }
4429 
4430   HandleInvoke(invoke);
4431 }
4432 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4433 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4434   if (invoke->GetLocations()->Intrinsified()) {
4435     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4436     intrinsic.Dispatch(invoke);
4437     return true;
4438   }
4439   return false;
4440 }
4441 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)4442 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4443       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4444       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
4445   // On ARM64 we support all dispatch types.
4446   return desired_dispatch_info;
4447 }
4448 
GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)4449 Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
4450                                                                     Location temp) {
4451   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4452   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
4453   switch (invoke->GetMethodLoadKind()) {
4454     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4455       uint32_t offset =
4456           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4457       // temp = thread->string_init_entrypoint
4458       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4459       break;
4460     }
4461     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4462       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4463       break;
4464     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
4465       // Load method address from literal pool.
4466       __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
4467       break;
4468     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
4469       // Add ADRP with its PC-relative DexCache access patch.
4470       const DexFile& dex_file = invoke->GetDexFileForPcRelativeDexCache();
4471       uint32_t element_offset = invoke->GetDexCacheArrayOffset();
4472       vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
4473       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4474       // Add LDR with its PC-relative DexCache access patch.
4475       vixl::aarch64::Label* ldr_label =
4476           NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
4477       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4478       break;
4479     }
4480     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
4481       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4482       Register reg = XRegisterFrom(temp);
4483       Register method_reg;
4484       if (current_method.IsRegister()) {
4485         method_reg = XRegisterFrom(current_method);
4486       } else {
4487         DCHECK(invoke->GetLocations()->Intrinsified());
4488         DCHECK(!current_method.IsValid());
4489         method_reg = reg;
4490         __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
4491       }
4492 
4493       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
4494       __ Ldr(reg.X(),
4495              MemOperand(method_reg.X(),
4496                         ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value()));
4497       // temp = temp[index_in_cache];
4498       // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
4499       uint32_t index_in_cache = invoke->GetDexMethodIndex();
4500     __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache)));
4501       break;
4502     }
4503   }
4504   return callee_method;
4505 }
4506 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)4507 void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
4508   // All registers are assumed to be correctly set up.
4509   Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
4510 
4511   switch (invoke->GetCodePtrLocation()) {
4512     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4513       __ Bl(&frame_entry_label_);
4514       break;
4515     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4516       // LR = callee_method->entry_point_from_quick_compiled_code_;
4517       __ Ldr(lr, MemOperand(
4518           XRegisterFrom(callee_method),
4519           ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
4520       {
4521         // To ensure that the pc position is recorded immediately after the `blr` instruction
4522         // BLR must be the last instruction emitted in this function.
4523         // Recording the pc will occur right after returning from this function.
4524         ExactAssemblyScope eas(GetVIXLAssembler(),
4525                                kInstructionSize,
4526                                CodeBufferCheckScope::kExactSize);
4527         // lr()
4528         __ blr(lr);
4529       }
4530       break;
4531   }
4532 
4533   DCHECK(!IsLeafMethod());
4534 }
4535 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in)4536 void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
4537   // Use the calling convention instead of the location of the receiver, as
4538   // intrinsics may have put the receiver in a different register. In the intrinsics
4539   // slow path, the arguments have been moved to the right place, so here we are
4540   // guaranteed that the receiver is the first register of the calling convention.
4541   InvokeDexCallingConvention calling_convention;
4542   Register receiver = calling_convention.GetRegisterAt(0);
4543   Register temp = XRegisterFrom(temp_in);
4544   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4545       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4546   Offset class_offset = mirror::Object::ClassOffset();
4547   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4548 
4549   DCHECK(receiver.IsRegister());
4550 
4551   {
4552     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4553     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4554     // /* HeapReference<Class> */ temp = receiver->klass_
4555     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4556     MaybeRecordImplicitNullCheck(invoke);
4557   }
4558   // Instead of simply (possibly) unpoisoning `temp` here, we should
4559   // emit a read barrier for the previous class reference load.
4560   // intermediate/temporary reference and because the current
4561   // concurrent copying collector keeps the from-space memory
4562   // intact/accessible until the end of the marking phase (the
4563   // concurrent copying collector may not in the future).
4564   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4565   // temp = temp->GetMethodAt(method_offset);
4566   __ Ldr(temp, MemOperand(temp, method_offset));
4567   // lr = temp->GetEntryPoint();
4568   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4569   {
4570     // To ensure that the pc position is recorded immediately after the `blr` instruction
4571     // BLR should be the last instruction emitted in this function.
4572     // Recording the pc will occur right after returning from this function.
4573     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4574     // lr();
4575     __ blr(lr);
4576   }
4577 }
4578 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4579 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4580   HandleInvoke(invoke);
4581 }
4582 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4583 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4584   codegen_->GenerateInvokePolymorphicCall(invoke);
4585 }
4586 
NewPcRelativeStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4587 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
4588     const DexFile& dex_file,
4589     dex::StringIndex string_index,
4590     vixl::aarch64::Label* adrp_label) {
4591   return
4592       NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
4593 }
4594 
NewPcRelativeTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4595 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
4596     const DexFile& dex_file,
4597     dex::TypeIndex type_index,
4598     vixl::aarch64::Label* adrp_label) {
4599   return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_);
4600 }
4601 
NewBssEntryTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4602 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
4603     const DexFile& dex_file,
4604     dex::TypeIndex type_index,
4605     vixl::aarch64::Label* adrp_label) {
4606   return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
4607 }
4608 
NewPcRelativeDexCacheArrayPatch(const DexFile & dex_file,uint32_t element_offset,vixl::aarch64::Label * adrp_label)4609 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
4610     const DexFile& dex_file,
4611     uint32_t element_offset,
4612     vixl::aarch64::Label* adrp_label) {
4613   return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
4614 }
4615 
NewBakerReadBarrierPatch(uint32_t custom_data)4616 vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
4617   baker_read_barrier_patches_.emplace_back(custom_data);
4618   return &baker_read_barrier_patches_.back().label;
4619 }
4620 
NewPcRelativePatch(const DexFile & dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)4621 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
4622     const DexFile& dex_file,
4623     uint32_t offset_or_index,
4624     vixl::aarch64::Label* adrp_label,
4625     ArenaDeque<PcRelativePatchInfo>* patches) {
4626   // Add a patch entry and return the label.
4627   patches->emplace_back(dex_file, offset_or_index);
4628   PcRelativePatchInfo* info = &patches->back();
4629   vixl::aarch64::Label* label = &info->label;
4630   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
4631   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
4632   return label;
4633 }
4634 
DeduplicateBootImageStringLiteral(const DexFile & dex_file,dex::StringIndex string_index)4635 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
4636     const DexFile& dex_file, dex::StringIndex string_index) {
4637   return boot_image_string_patches_.GetOrCreate(
4638       StringReference(&dex_file, string_index),
4639       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4640 }
4641 
DeduplicateBootImageTypeLiteral(const DexFile & dex_file,dex::TypeIndex type_index)4642 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
4643     const DexFile& dex_file, dex::TypeIndex type_index) {
4644   return boot_image_type_patches_.GetOrCreate(
4645       TypeReference(&dex_file, type_index),
4646       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4647 }
4648 
DeduplicateBootImageAddressLiteral(uint64_t address)4649 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
4650     uint64_t address) {
4651   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
4652 }
4653 
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)4654 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
4655     const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
4656   jit_string_roots_.Overwrite(StringReference(&dex_file, string_index),
4657                               reinterpret_cast64<uint64_t>(handle.GetReference()));
4658   return jit_string_patches_.GetOrCreate(
4659       StringReference(&dex_file, string_index),
4660       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4661 }
4662 
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)4663 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
4664     const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
4665   jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index),
4666                              reinterpret_cast64<uint64_t>(handle.GetReference()));
4667   return jit_class_patches_.GetOrCreate(
4668       TypeReference(&dex_file, type_index),
4669       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4670 }
4671 
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)4672 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
4673                                              vixl::aarch64::Register reg) {
4674   DCHECK(reg.IsX());
4675   SingleEmissionCheckScope guard(GetVIXLAssembler());
4676   __ Bind(fixup_label);
4677   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
4678 }
4679 
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4680 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
4681                                             vixl::aarch64::Register out,
4682                                             vixl::aarch64::Register base) {
4683   DCHECK(out.IsX());
4684   DCHECK(base.IsX());
4685   SingleEmissionCheckScope guard(GetVIXLAssembler());
4686   __ Bind(fixup_label);
4687   __ add(out, base, Operand(/* offset placeholder */ 0));
4688 }
4689 
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4690 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
4691                                                   vixl::aarch64::Register out,
4692                                                   vixl::aarch64::Register base) {
4693   DCHECK(base.IsX());
4694   SingleEmissionCheckScope guard(GetVIXLAssembler());
4695   __ Bind(fixup_label);
4696   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
4697 }
4698 
4699 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<LinkerPatch> * linker_patches)4700 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
4701     const ArenaDeque<PcRelativePatchInfo>& infos,
4702     ArenaVector<LinkerPatch>* linker_patches) {
4703   for (const PcRelativePatchInfo& info : infos) {
4704     linker_patches->push_back(Factory(info.label.GetLocation(),
4705                                       &info.target_dex_file,
4706                                       info.pc_insn_label->GetLocation(),
4707                                       info.offset_or_index));
4708   }
4709 }
4710 
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)4711 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
4712   DCHECK(linker_patches->empty());
4713   size_t size =
4714       pc_relative_dex_cache_patches_.size() +
4715       boot_image_string_patches_.size() +
4716       pc_relative_string_patches_.size() +
4717       boot_image_type_patches_.size() +
4718       pc_relative_type_patches_.size() +
4719       type_bss_entry_patches_.size() +
4720       baker_read_barrier_patches_.size();
4721   linker_patches->reserve(size);
4722   for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
4723     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
4724                                                               &info.target_dex_file,
4725                                                               info.pc_insn_label->GetLocation(),
4726                                                               info.offset_or_index));
4727   }
4728   for (const auto& entry : boot_image_string_patches_) {
4729     const StringReference& target_string = entry.first;
4730     vixl::aarch64::Literal<uint32_t>* literal = entry.second;
4731     linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
4732                                                        target_string.dex_file,
4733                                                        target_string.string_index.index_));
4734   }
4735   if (!GetCompilerOptions().IsBootImage()) {
4736     DCHECK(pc_relative_type_patches_.empty());
4737     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
4738                                                                   linker_patches);
4739   } else {
4740     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
4741                                                                 linker_patches);
4742     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
4743                                                                   linker_patches);
4744   }
4745   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
4746                                                               linker_patches);
4747   for (const auto& entry : boot_image_type_patches_) {
4748     const TypeReference& target_type = entry.first;
4749     vixl::aarch64::Literal<uint32_t>* literal = entry.second;
4750     linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
4751                                                      target_type.dex_file,
4752                                                      target_type.type_index.index_));
4753   }
4754   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
4755     linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
4756                                                                        info.custom_data));
4757   }
4758   DCHECK_EQ(size, linker_patches->size());
4759 }
4760 
DeduplicateUint32Literal(uint32_t value,Uint32ToLiteralMap * map)4761 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
4762                                                                       Uint32ToLiteralMap* map) {
4763   return map->GetOrCreate(
4764       value,
4765       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
4766 }
4767 
DeduplicateUint64Literal(uint64_t value)4768 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
4769   return uint64_literals_.GetOrCreate(
4770       value,
4771       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
4772 }
4773 
DeduplicateMethodLiteral(MethodReference target_method,MethodToLiteralMap * map)4774 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
4775     MethodReference target_method,
4776     MethodToLiteralMap* map) {
4777   return map->GetOrCreate(
4778       target_method,
4779       [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
4780 }
4781 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4782 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4783   // Explicit clinit checks triggered by static invokes must have been pruned by
4784   // art::PrepareForRegisterAllocation.
4785   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4786 
4787   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4788     return;
4789   }
4790 
4791   // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
4792   // are no pools emitted.
4793   EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4794   LocationSummary* locations = invoke->GetLocations();
4795   codegen_->GenerateStaticOrDirectCall(
4796       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
4797   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4798 }
4799 
VisitInvokeVirtual(HInvokeVirtual * invoke)4800 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4801   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4802     return;
4803   }
4804 
4805   // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
4806   // are no pools emitted.
4807   EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4808   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
4809   DCHECK(!codegen_->IsLeafMethod());
4810   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4811 }
4812 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)4813 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
4814     HLoadClass::LoadKind desired_class_load_kind) {
4815   switch (desired_class_load_kind) {
4816     case HLoadClass::LoadKind::kInvalid:
4817       LOG(FATAL) << "UNREACHABLE";
4818       UNREACHABLE();
4819     case HLoadClass::LoadKind::kReferrersClass:
4820       break;
4821     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
4822       DCHECK(!GetCompilerOptions().GetCompilePic());
4823       break;
4824     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
4825       DCHECK(GetCompilerOptions().GetCompilePic());
4826       break;
4827     case HLoadClass::LoadKind::kBootImageAddress:
4828       break;
4829     case HLoadClass::LoadKind::kBssEntry:
4830       DCHECK(!Runtime::Current()->UseJitCompilation());
4831       break;
4832     case HLoadClass::LoadKind::kJitTableAddress:
4833       DCHECK(Runtime::Current()->UseJitCompilation());
4834       break;
4835     case HLoadClass::LoadKind::kDexCacheViaMethod:
4836       break;
4837   }
4838   return desired_class_load_kind;
4839 }
4840 
VisitLoadClass(HLoadClass * cls)4841 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
4842   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4843   if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
4844     InvokeRuntimeCallingConvention calling_convention;
4845     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
4846         cls,
4847         LocationFrom(calling_convention.GetRegisterAt(0)),
4848         LocationFrom(vixl::aarch64::x0));
4849     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
4850     return;
4851   }
4852   DCHECK(!cls->NeedsAccessCheck());
4853 
4854   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
4855   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
4856       ? LocationSummary::kCallOnSlowPath
4857       : LocationSummary::kNoCall;
4858   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
4859   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
4860     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4861   }
4862 
4863   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
4864     locations->SetInAt(0, Location::RequiresRegister());
4865   }
4866   locations->SetOut(Location::RequiresRegister());
4867   if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
4868     if (!kUseReadBarrier || kUseBakerReadBarrier) {
4869       // Rely on the type resolution or initialization and marking to save everything we need.
4870       locations->AddTemp(FixedTempLocation());
4871       RegisterSet caller_saves = RegisterSet::Empty();
4872       InvokeRuntimeCallingConvention calling_convention;
4873       caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
4874       DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
4875                 RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
4876                              Primitive::kPrimNot).GetCode());
4877       locations->SetCustomSlowPathCallerSaves(caller_saves);
4878     } else {
4879       // For non-Baker read barrier we have a temp-clobbering call.
4880     }
4881   }
4882 }
4883 
4884 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
4885 // move.
VisitLoadClass(HLoadClass * cls)4886 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
4887   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4888   if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
4889     codegen_->GenerateLoadClassRuntimeCall(cls);
4890     return;
4891   }
4892   DCHECK(!cls->NeedsAccessCheck());
4893 
4894   Location out_loc = cls->GetLocations()->Out();
4895   Register out = OutputRegister(cls);
4896   Register bss_entry_temp;
4897   vixl::aarch64::Label* bss_entry_adrp_label = nullptr;
4898 
4899   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
4900       ? kWithoutReadBarrier
4901       : kCompilerReadBarrierOption;
4902   bool generate_null_check = false;
4903   switch (load_kind) {
4904     case HLoadClass::LoadKind::kReferrersClass: {
4905       DCHECK(!cls->CanCallRuntime());
4906       DCHECK(!cls->MustGenerateClinitCheck());
4907       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
4908       Register current_method = InputRegisterAt(cls, 0);
4909       GenerateGcRootFieldLoad(cls,
4910                               out_loc,
4911                               current_method,
4912                               ArtMethod::DeclaringClassOffset().Int32Value(),
4913                               /* fixup_label */ nullptr,
4914                               read_barrier_option);
4915       break;
4916     }
4917     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
4918       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4919       __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
4920                                                             cls->GetTypeIndex()));
4921       break;
4922     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
4923       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4924       // Add ADRP with its PC-relative type patch.
4925       const DexFile& dex_file = cls->GetDexFile();
4926       dex::TypeIndex type_index = cls->GetTypeIndex();
4927       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
4928       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4929       // Add ADD with its PC-relative type patch.
4930       vixl::aarch64::Label* add_label =
4931           codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
4932       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
4933       break;
4934     }
4935     case HLoadClass::LoadKind::kBootImageAddress: {
4936       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4937       uint32_t address = dchecked_integral_cast<uint32_t>(
4938           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
4939       DCHECK_NE(address, 0u);
4940       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
4941       break;
4942     }
4943     case HLoadClass::LoadKind::kBssEntry: {
4944       // Add ADRP with its PC-relative Class .bss entry patch.
4945       const DexFile& dex_file = cls->GetDexFile();
4946       dex::TypeIndex type_index = cls->GetTypeIndex();
4947       bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0));
4948       bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
4949       codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp);
4950       // Add LDR with its PC-relative Class patch.
4951       vixl::aarch64::Label* ldr_label =
4952           codegen_->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label);
4953       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
4954       GenerateGcRootFieldLoad(cls,
4955                               out_loc,
4956                               bss_entry_temp,
4957                               /* offset placeholder */ 0u,
4958                               ldr_label,
4959                               read_barrier_option);
4960       generate_null_check = true;
4961       break;
4962     }
4963     case HLoadClass::LoadKind::kJitTableAddress: {
4964       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
4965                                                        cls->GetTypeIndex(),
4966                                                        cls->GetClass()));
4967       GenerateGcRootFieldLoad(cls,
4968                               out_loc,
4969                               out.X(),
4970                               /* offset */ 0,
4971                               /* fixup_label */ nullptr,
4972                               read_barrier_option);
4973       break;
4974     }
4975     case HLoadClass::LoadKind::kDexCacheViaMethod:
4976     case HLoadClass::LoadKind::kInvalid:
4977       LOG(FATAL) << "UNREACHABLE";
4978       UNREACHABLE();
4979   }
4980 
4981   bool do_clinit = cls->MustGenerateClinitCheck();
4982   if (generate_null_check || do_clinit) {
4983     DCHECK(cls->CanCallRuntime());
4984     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
4985         cls, cls, cls->GetDexPc(), do_clinit, bss_entry_temp, bss_entry_adrp_label);
4986     codegen_->AddSlowPath(slow_path);
4987     if (generate_null_check) {
4988       __ Cbz(out, slow_path->GetEntryLabel());
4989     }
4990     if (cls->MustGenerateClinitCheck()) {
4991       GenerateClassInitializationCheck(slow_path, out);
4992     } else {
4993       __ Bind(slow_path->GetExitLabel());
4994     }
4995   }
4996 }
4997 
GetExceptionTlsAddress()4998 static MemOperand GetExceptionTlsAddress() {
4999   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
5000 }
5001 
VisitLoadException(HLoadException * load)5002 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
5003   LocationSummary* locations =
5004       new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5005   locations->SetOut(Location::RequiresRegister());
5006 }
5007 
VisitLoadException(HLoadException * instruction)5008 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
5009   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
5010 }
5011 
VisitClearException(HClearException * clear)5012 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
5013   new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5014 }
5015 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5016 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5017   __ Str(wzr, GetExceptionTlsAddress());
5018 }
5019 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5020 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
5021     HLoadString::LoadKind desired_string_load_kind) {
5022   switch (desired_string_load_kind) {
5023     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5024       DCHECK(!GetCompilerOptions().GetCompilePic());
5025       break;
5026     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5027       DCHECK(GetCompilerOptions().GetCompilePic());
5028       break;
5029     case HLoadString::LoadKind::kBootImageAddress:
5030       break;
5031     case HLoadString::LoadKind::kBssEntry:
5032       DCHECK(!Runtime::Current()->UseJitCompilation());
5033       break;
5034     case HLoadString::LoadKind::kJitTableAddress:
5035       DCHECK(Runtime::Current()->UseJitCompilation());
5036       break;
5037     case HLoadString::LoadKind::kDexCacheViaMethod:
5038       break;
5039   }
5040   return desired_string_load_kind;
5041 }
5042 
VisitLoadString(HLoadString * load)5043 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5044   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5045   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5046   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5047     InvokeRuntimeCallingConvention calling_convention;
5048     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5049   } else {
5050     locations->SetOut(Location::RequiresRegister());
5051     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5052       if (!kUseReadBarrier || kUseBakerReadBarrier) {
5053         // Rely on the pResolveString and marking to save everything we need.
5054         locations->AddTemp(FixedTempLocation());
5055         RegisterSet caller_saves = RegisterSet::Empty();
5056         InvokeRuntimeCallingConvention calling_convention;
5057         caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
5058         DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
5059                   RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
5060                                Primitive::kPrimNot).GetCode());
5061         locations->SetCustomSlowPathCallerSaves(caller_saves);
5062       } else {
5063         // For non-Baker read barrier we have a temp-clobbering call.
5064       }
5065     }
5066   }
5067 }
5068 
5069 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5070 // move.
VisitLoadString(HLoadString * load)5071 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5072   Register out = OutputRegister(load);
5073   Location out_loc = load->GetLocations()->Out();
5074 
5075   switch (load->GetLoadKind()) {
5076     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5077       __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
5078                                                               load->GetStringIndex()));
5079       return;  // No dex cache slow path.
5080     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5081       // Add ADRP with its PC-relative String patch.
5082       const DexFile& dex_file = load->GetDexFile();
5083       const dex::StringIndex string_index = load->GetStringIndex();
5084       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5085       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
5086       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5087       // Add ADD with its PC-relative String patch.
5088       vixl::aarch64::Label* add_label =
5089           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
5090       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5091       return;  // No dex cache slow path.
5092     }
5093     case HLoadString::LoadKind::kBootImageAddress: {
5094       uint32_t address = dchecked_integral_cast<uint32_t>(
5095           reinterpret_cast<uintptr_t>(load->GetString().Get()));
5096       DCHECK_NE(address, 0u);
5097       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5098       return;  // No dex cache slow path.
5099     }
5100     case HLoadString::LoadKind::kBssEntry: {
5101       // Add ADRP with its PC-relative String .bss entry patch.
5102       const DexFile& dex_file = load->GetDexFile();
5103       const dex::StringIndex string_index = load->GetStringIndex();
5104       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5105       Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0));
5106       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
5107       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5108       // Add LDR with its PC-relative String patch.
5109       vixl::aarch64::Label* ldr_label =
5110           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
5111       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
5112       GenerateGcRootFieldLoad(load,
5113                               out_loc,
5114                               temp,
5115                               /* offset placeholder */ 0u,
5116                               ldr_label,
5117                               kCompilerReadBarrierOption);
5118       SlowPathCodeARM64* slow_path =
5119           new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
5120       codegen_->AddSlowPath(slow_path);
5121       __ Cbz(out.X(), slow_path->GetEntryLabel());
5122       __ Bind(slow_path->GetExitLabel());
5123       return;
5124     }
5125     case HLoadString::LoadKind::kJitTableAddress: {
5126       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5127                                                         load->GetStringIndex(),
5128                                                         load->GetString()));
5129       GenerateGcRootFieldLoad(load,
5130                               out_loc,
5131                               out.X(),
5132                               /* offset */ 0,
5133                               /* fixup_label */ nullptr,
5134                               kCompilerReadBarrierOption);
5135       return;
5136     }
5137     default:
5138       break;
5139   }
5140 
5141   // TODO: Re-add the compiler code to do string dex cache lookup again.
5142   InvokeRuntimeCallingConvention calling_convention;
5143   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5144   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5145   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5146   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5147 }
5148 
VisitLongConstant(HLongConstant * constant)5149 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5150   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
5151   locations->SetOut(Location::ConstantLocation(constant));
5152 }
5153 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)5154 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
5155   // Will be generated at use site.
5156 }
5157 
VisitMonitorOperation(HMonitorOperation * instruction)5158 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5159   LocationSummary* locations =
5160       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5161   InvokeRuntimeCallingConvention calling_convention;
5162   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5163 }
5164 
VisitMonitorOperation(HMonitorOperation * instruction)5165 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5166   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5167                           instruction,
5168                           instruction->GetDexPc());
5169   if (instruction->IsEnter()) {
5170     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5171   } else {
5172     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5173   }
5174 }
5175 
VisitMul(HMul * mul)5176 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5177   LocationSummary* locations =
5178       new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
5179   switch (mul->GetResultType()) {
5180     case Primitive::kPrimInt:
5181     case Primitive::kPrimLong:
5182       locations->SetInAt(0, Location::RequiresRegister());
5183       locations->SetInAt(1, Location::RequiresRegister());
5184       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5185       break;
5186 
5187     case Primitive::kPrimFloat:
5188     case Primitive::kPrimDouble:
5189       locations->SetInAt(0, Location::RequiresFpuRegister());
5190       locations->SetInAt(1, Location::RequiresFpuRegister());
5191       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5192       break;
5193 
5194     default:
5195       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5196   }
5197 }
5198 
VisitMul(HMul * mul)5199 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5200   switch (mul->GetResultType()) {
5201     case Primitive::kPrimInt:
5202     case Primitive::kPrimLong:
5203       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5204       break;
5205 
5206     case Primitive::kPrimFloat:
5207     case Primitive::kPrimDouble:
5208       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5209       break;
5210 
5211     default:
5212       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5213   }
5214 }
5215 
VisitNeg(HNeg * neg)5216 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5217   LocationSummary* locations =
5218       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
5219   switch (neg->GetResultType()) {
5220     case Primitive::kPrimInt:
5221     case Primitive::kPrimLong:
5222       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5223       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5224       break;
5225 
5226     case Primitive::kPrimFloat:
5227     case Primitive::kPrimDouble:
5228       locations->SetInAt(0, Location::RequiresFpuRegister());
5229       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5230       break;
5231 
5232     default:
5233       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5234   }
5235 }
5236 
VisitNeg(HNeg * neg)5237 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5238   switch (neg->GetResultType()) {
5239     case Primitive::kPrimInt:
5240     case Primitive::kPrimLong:
5241       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5242       break;
5243 
5244     case Primitive::kPrimFloat:
5245     case Primitive::kPrimDouble:
5246       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5247       break;
5248 
5249     default:
5250       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5251   }
5252 }
5253 
VisitNewArray(HNewArray * instruction)5254 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5255   LocationSummary* locations =
5256       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5257   InvokeRuntimeCallingConvention calling_convention;
5258   locations->SetOut(LocationFrom(x0));
5259   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5260   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5261 }
5262 
VisitNewArray(HNewArray * instruction)5263 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5264   // Note: if heap poisoning is enabled, the entry point takes cares
5265   // of poisoning the reference.
5266   QuickEntrypointEnum entrypoint =
5267       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
5268   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5269   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5270 }
5271 
VisitNewInstance(HNewInstance * instruction)5272 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5273   LocationSummary* locations =
5274       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5275   InvokeRuntimeCallingConvention calling_convention;
5276   if (instruction->IsStringAlloc()) {
5277     locations->AddTemp(LocationFrom(kArtMethodRegister));
5278   } else {
5279     locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5280   }
5281   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
5282 }
5283 
VisitNewInstance(HNewInstance * instruction)5284 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5285   // Note: if heap poisoning is enabled, the entry point takes cares
5286   // of poisoning the reference.
5287   if (instruction->IsStringAlloc()) {
5288     // String is allocated through StringFactory. Call NewEmptyString entry point.
5289     Location temp = instruction->GetLocations()->GetTemp(0);
5290     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5291     __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
5292     __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
5293 
5294     {
5295       // Ensure the pc position is recorded immediately after the `blr` instruction.
5296       ExactAssemblyScope eas(GetVIXLAssembler(),
5297                              kInstructionSize,
5298                              CodeBufferCheckScope::kExactSize);
5299       __ blr(lr);
5300       codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
5301     }
5302   } else {
5303     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5304     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5305   }
5306 }
5307 
VisitNot(HNot * instruction)5308 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5309   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5310   locations->SetInAt(0, Location::RequiresRegister());
5311   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5312 }
5313 
VisitNot(HNot * instruction)5314 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5315   switch (instruction->GetResultType()) {
5316     case Primitive::kPrimInt:
5317     case Primitive::kPrimLong:
5318       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5319       break;
5320 
5321     default:
5322       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5323   }
5324 }
5325 
VisitBooleanNot(HBooleanNot * instruction)5326 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5327   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5328   locations->SetInAt(0, Location::RequiresRegister());
5329   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5330 }
5331 
VisitBooleanNot(HBooleanNot * instruction)5332 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5333   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5334 }
5335 
VisitNullCheck(HNullCheck * instruction)5336 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5337   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5338   locations->SetInAt(0, Location::RequiresRegister());
5339 }
5340 
GenerateImplicitNullCheck(HNullCheck * instruction)5341 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5342   if (CanMoveNullCheckToUser(instruction)) {
5343     return;
5344   }
5345   {
5346     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5347     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5348     Location obj = instruction->GetLocations()->InAt(0);
5349     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5350     RecordPcInfo(instruction, instruction->GetDexPc());
5351   }
5352 }
5353 
GenerateExplicitNullCheck(HNullCheck * instruction)5354 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5355   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction);
5356   AddSlowPath(slow_path);
5357 
5358   LocationSummary* locations = instruction->GetLocations();
5359   Location obj = locations->InAt(0);
5360 
5361   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5362 }
5363 
VisitNullCheck(HNullCheck * instruction)5364 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5365   codegen_->GenerateNullCheck(instruction);
5366 }
5367 
VisitOr(HOr * instruction)5368 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5369   HandleBinaryOp(instruction);
5370 }
5371 
VisitOr(HOr * instruction)5372 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5373   HandleBinaryOp(instruction);
5374 }
5375 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5376 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5377   LOG(FATAL) << "Unreachable";
5378 }
5379 
VisitParallelMove(HParallelMove * instruction)5380 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5381   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5382 }
5383 
VisitParameterValue(HParameterValue * instruction)5384 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5385   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5386   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5387   if (location.IsStackSlot()) {
5388     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5389   } else if (location.IsDoubleStackSlot()) {
5390     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5391   }
5392   locations->SetOut(location);
5393 }
5394 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5395 void InstructionCodeGeneratorARM64::VisitParameterValue(
5396     HParameterValue* instruction ATTRIBUTE_UNUSED) {
5397   // Nothing to do, the parameter is already at its location.
5398 }
5399 
VisitCurrentMethod(HCurrentMethod * instruction)5400 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5401   LocationSummary* locations =
5402       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
5403   locations->SetOut(LocationFrom(kArtMethodRegister));
5404 }
5405 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5406 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5407     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5408   // Nothing to do, the method is already at its location.
5409 }
5410 
VisitPhi(HPhi * instruction)5411 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5412   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5413   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5414     locations->SetInAt(i, Location::Any());
5415   }
5416   locations->SetOut(Location::Any());
5417 }
5418 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5419 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5420   LOG(FATAL) << "Unreachable";
5421 }
5422 
VisitRem(HRem * rem)5423 void LocationsBuilderARM64::VisitRem(HRem* rem) {
5424   Primitive::Type type = rem->GetResultType();
5425   LocationSummary::CallKind call_kind =
5426       Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5427                                            : LocationSummary::kNoCall;
5428   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
5429 
5430   switch (type) {
5431     case Primitive::kPrimInt:
5432     case Primitive::kPrimLong:
5433       locations->SetInAt(0, Location::RequiresRegister());
5434       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5435       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5436       break;
5437 
5438     case Primitive::kPrimFloat:
5439     case Primitive::kPrimDouble: {
5440       InvokeRuntimeCallingConvention calling_convention;
5441       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
5442       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
5443       locations->SetOut(calling_convention.GetReturnLocation(type));
5444 
5445       break;
5446     }
5447 
5448     default:
5449       LOG(FATAL) << "Unexpected rem type " << type;
5450   }
5451 }
5452 
VisitRem(HRem * rem)5453 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
5454   Primitive::Type type = rem->GetResultType();
5455 
5456   switch (type) {
5457     case Primitive::kPrimInt:
5458     case Primitive::kPrimLong: {
5459       GenerateDivRemIntegral(rem);
5460       break;
5461     }
5462 
5463     case Primitive::kPrimFloat:
5464     case Primitive::kPrimDouble: {
5465       QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod;
5466       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
5467       if (type == Primitive::kPrimFloat) {
5468         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
5469       } else {
5470         CheckEntrypointTypes<kQuickFmod, double, double, double>();
5471       }
5472       break;
5473     }
5474 
5475     default:
5476       LOG(FATAL) << "Unexpected rem type " << type;
5477       UNREACHABLE();
5478   }
5479 }
5480 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5481 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5482   memory_barrier->SetLocations(nullptr);
5483 }
5484 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5485 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5486   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
5487 }
5488 
VisitReturn(HReturn * instruction)5489 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
5490   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5491   Primitive::Type return_type = instruction->InputAt(0)->GetType();
5492   locations->SetInAt(0, ARM64ReturnLocation(return_type));
5493 }
5494 
VisitReturn(HReturn * instruction ATTRIBUTE_UNUSED)5495 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) {
5496   codegen_->GenerateFrameExit();
5497 }
5498 
VisitReturnVoid(HReturnVoid * instruction)5499 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
5500   instruction->SetLocations(nullptr);
5501 }
5502 
VisitReturnVoid(HReturnVoid * instruction ATTRIBUTE_UNUSED)5503 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
5504   codegen_->GenerateFrameExit();
5505 }
5506 
VisitRor(HRor * ror)5507 void LocationsBuilderARM64::VisitRor(HRor* ror) {
5508   HandleBinaryOp(ror);
5509 }
5510 
VisitRor(HRor * ror)5511 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
5512   HandleBinaryOp(ror);
5513 }
5514 
VisitShl(HShl * shl)5515 void LocationsBuilderARM64::VisitShl(HShl* shl) {
5516   HandleShift(shl);
5517 }
5518 
VisitShl(HShl * shl)5519 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
5520   HandleShift(shl);
5521 }
5522 
VisitShr(HShr * shr)5523 void LocationsBuilderARM64::VisitShr(HShr* shr) {
5524   HandleShift(shr);
5525 }
5526 
VisitShr(HShr * shr)5527 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
5528   HandleShift(shr);
5529 }
5530 
VisitSub(HSub * instruction)5531 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
5532   HandleBinaryOp(instruction);
5533 }
5534 
VisitSub(HSub * instruction)5535 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
5536   HandleBinaryOp(instruction);
5537 }
5538 
VisitStaticFieldGet(HStaticFieldGet * instruction)5539 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5540   HandleFieldGet(instruction, instruction->GetFieldInfo());
5541 }
5542 
VisitStaticFieldGet(HStaticFieldGet * instruction)5543 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5544   HandleFieldGet(instruction, instruction->GetFieldInfo());
5545 }
5546 
VisitStaticFieldSet(HStaticFieldSet * instruction)5547 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5548   HandleFieldSet(instruction);
5549 }
5550 
VisitStaticFieldSet(HStaticFieldSet * instruction)5551 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5552   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5553 }
5554 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5555 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
5556     HUnresolvedInstanceFieldGet* instruction) {
5557   FieldAccessCallingConventionARM64 calling_convention;
5558   codegen_->CreateUnresolvedFieldLocationSummary(
5559       instruction, instruction->GetFieldType(), calling_convention);
5560 }
5561 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5562 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
5563     HUnresolvedInstanceFieldGet* instruction) {
5564   FieldAccessCallingConventionARM64 calling_convention;
5565   codegen_->GenerateUnresolvedFieldAccess(instruction,
5566                                           instruction->GetFieldType(),
5567                                           instruction->GetFieldIndex(),
5568                                           instruction->GetDexPc(),
5569                                           calling_convention);
5570 }
5571 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5572 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
5573     HUnresolvedInstanceFieldSet* instruction) {
5574   FieldAccessCallingConventionARM64 calling_convention;
5575   codegen_->CreateUnresolvedFieldLocationSummary(
5576       instruction, instruction->GetFieldType(), calling_convention);
5577 }
5578 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5579 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
5580     HUnresolvedInstanceFieldSet* instruction) {
5581   FieldAccessCallingConventionARM64 calling_convention;
5582   codegen_->GenerateUnresolvedFieldAccess(instruction,
5583                                           instruction->GetFieldType(),
5584                                           instruction->GetFieldIndex(),
5585                                           instruction->GetDexPc(),
5586                                           calling_convention);
5587 }
5588 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5589 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
5590     HUnresolvedStaticFieldGet* instruction) {
5591   FieldAccessCallingConventionARM64 calling_convention;
5592   codegen_->CreateUnresolvedFieldLocationSummary(
5593       instruction, instruction->GetFieldType(), calling_convention);
5594 }
5595 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5596 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
5597     HUnresolvedStaticFieldGet* instruction) {
5598   FieldAccessCallingConventionARM64 calling_convention;
5599   codegen_->GenerateUnresolvedFieldAccess(instruction,
5600                                           instruction->GetFieldType(),
5601                                           instruction->GetFieldIndex(),
5602                                           instruction->GetDexPc(),
5603                                           calling_convention);
5604 }
5605 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5606 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
5607     HUnresolvedStaticFieldSet* instruction) {
5608   FieldAccessCallingConventionARM64 calling_convention;
5609   codegen_->CreateUnresolvedFieldLocationSummary(
5610       instruction, instruction->GetFieldType(), calling_convention);
5611 }
5612 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5613 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
5614     HUnresolvedStaticFieldSet* instruction) {
5615   FieldAccessCallingConventionARM64 calling_convention;
5616   codegen_->GenerateUnresolvedFieldAccess(instruction,
5617                                           instruction->GetFieldType(),
5618                                           instruction->GetFieldIndex(),
5619                                           instruction->GetDexPc(),
5620                                           calling_convention);
5621 }
5622 
VisitSuspendCheck(HSuspendCheck * instruction)5623 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5624   LocationSummary* locations =
5625       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5626   // In suspend check slow path, usually there are no caller-save registers at all.
5627   // If SIMD instructions are present, however, we force spilling all live SIMD
5628   // registers in full width (since the runtime only saves/restores lower part).
5629   locations->SetCustomSlowPathCallerSaves(
5630       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5631 }
5632 
VisitSuspendCheck(HSuspendCheck * instruction)5633 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5634   HBasicBlock* block = instruction->GetBlock();
5635   if (block->GetLoopInformation() != nullptr) {
5636     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5637     // The back edge will generate the suspend check.
5638     return;
5639   }
5640   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5641     // The goto will generate the suspend check.
5642     return;
5643   }
5644   GenerateSuspendCheck(instruction, nullptr);
5645 }
5646 
VisitThrow(HThrow * instruction)5647 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
5648   LocationSummary* locations =
5649       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5650   InvokeRuntimeCallingConvention calling_convention;
5651   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5652 }
5653 
VisitThrow(HThrow * instruction)5654 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
5655   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5656   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5657 }
5658 
VisitTypeConversion(HTypeConversion * conversion)5659 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
5660   LocationSummary* locations =
5661       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
5662   Primitive::Type input_type = conversion->GetInputType();
5663   Primitive::Type result_type = conversion->GetResultType();
5664   DCHECK_NE(input_type, result_type);
5665   if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
5666       (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
5667     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
5668   }
5669 
5670   if (Primitive::IsFloatingPointType(input_type)) {
5671     locations->SetInAt(0, Location::RequiresFpuRegister());
5672   } else {
5673     locations->SetInAt(0, Location::RequiresRegister());
5674   }
5675 
5676   if (Primitive::IsFloatingPointType(result_type)) {
5677     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5678   } else {
5679     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5680   }
5681 }
5682 
VisitTypeConversion(HTypeConversion * conversion)5683 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
5684   Primitive::Type result_type = conversion->GetResultType();
5685   Primitive::Type input_type = conversion->GetInputType();
5686 
5687   DCHECK_NE(input_type, result_type);
5688 
5689   if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
5690     int result_size = Primitive::ComponentSize(result_type);
5691     int input_size = Primitive::ComponentSize(input_type);
5692     int min_size = std::min(result_size, input_size);
5693     Register output = OutputRegister(conversion);
5694     Register source = InputRegisterAt(conversion, 0);
5695     if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
5696       // 'int' values are used directly as W registers, discarding the top
5697       // bits, so we don't need to sign-extend and can just perform a move.
5698       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
5699       // top 32 bits of the target register. We theoretically could leave those
5700       // bits unchanged, but we would have to make sure that no code uses a
5701       // 32bit input value as a 64bit value assuming that the top 32 bits are
5702       // zero.
5703       __ Mov(output.W(), source.W());
5704     } else if (result_type == Primitive::kPrimChar ||
5705                (input_type == Primitive::kPrimChar && input_size < result_size)) {
5706       __ Ubfx(output,
5707               output.IsX() ? source.X() : source.W(),
5708               0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
5709     } else {
5710       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
5711     }
5712   } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
5713     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
5714   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
5715     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
5716     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
5717   } else if (Primitive::IsFloatingPointType(result_type) &&
5718              Primitive::IsFloatingPointType(input_type)) {
5719     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
5720   } else {
5721     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
5722                 << " to " << result_type;
5723   }
5724 }
5725 
VisitUShr(HUShr * ushr)5726 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
5727   HandleShift(ushr);
5728 }
5729 
VisitUShr(HUShr * ushr)5730 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
5731   HandleShift(ushr);
5732 }
5733 
VisitXor(HXor * instruction)5734 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
5735   HandleBinaryOp(instruction);
5736 }
5737 
VisitXor(HXor * instruction)5738 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
5739   HandleBinaryOp(instruction);
5740 }
5741 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5742 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5743   // Nothing to do, this should be removed during prepare for register allocator.
5744   LOG(FATAL) << "Unreachable";
5745 }
5746 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5747 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5748   // Nothing to do, this should be removed during prepare for register allocator.
5749   LOG(FATAL) << "Unreachable";
5750 }
5751 
5752 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)5753 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5754   LocationSummary* locations =
5755       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
5756   locations->SetInAt(0, Location::RequiresRegister());
5757 }
5758 
VisitPackedSwitch(HPackedSwitch * switch_instr)5759 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5760   int32_t lower_bound = switch_instr->GetStartValue();
5761   uint32_t num_entries = switch_instr->GetNumEntries();
5762   Register value_reg = InputRegisterAt(switch_instr, 0);
5763   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
5764 
5765   // Roughly set 16 as max average assemblies generated per HIR in a graph.
5766   static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
5767   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
5768   // make sure we don't emit it if the target may run out of range.
5769   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
5770   // ranges and emit the tables only as required.
5771   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
5772 
5773   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
5774       // Current instruction id is an upper bound of the number of HIRs in the graph.
5775       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
5776     // Create a series of compare/jumps.
5777     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5778     Register temp = temps.AcquireW();
5779     __ Subs(temp, value_reg, Operand(lower_bound));
5780 
5781     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
5782     // Jump to successors[0] if value == lower_bound.
5783     __ B(eq, codegen_->GetLabelOf(successors[0]));
5784     int32_t last_index = 0;
5785     for (; num_entries - last_index > 2; last_index += 2) {
5786       __ Subs(temp, temp, Operand(2));
5787       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
5788       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
5789       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
5790       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
5791     }
5792     if (num_entries - last_index == 2) {
5793       // The last missing case_value.
5794       __ Cmp(temp, Operand(1));
5795       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
5796     }
5797 
5798     // And the default for any other value.
5799     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
5800       __ B(codegen_->GetLabelOf(default_block));
5801     }
5802   } else {
5803     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
5804 
5805     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5806 
5807     // Below instructions should use at most one blocked register. Since there are two blocked
5808     // registers, we are free to block one.
5809     Register temp_w = temps.AcquireW();
5810     Register index;
5811     // Remove the bias.
5812     if (lower_bound != 0) {
5813       index = temp_w;
5814       __ Sub(index, value_reg, Operand(lower_bound));
5815     } else {
5816       index = value_reg;
5817     }
5818 
5819     // Jump to default block if index is out of the range.
5820     __ Cmp(index, Operand(num_entries));
5821     __ B(hs, codegen_->GetLabelOf(default_block));
5822 
5823     // In current VIXL implementation, it won't require any blocked registers to encode the
5824     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
5825     // register pressure.
5826     Register table_base = temps.AcquireX();
5827     // Load jump offset from the table.
5828     __ Adr(table_base, jump_table->GetTableStartLabel());
5829     Register jump_offset = temp_w;
5830     __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
5831 
5832     // Jump to target block by branching to table_base(pc related) + offset.
5833     Register target_address = table_base;
5834     __ Add(target_address, table_base, Operand(jump_offset, SXTW));
5835     __ Br(target_address);
5836   }
5837 }
5838 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5839 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
5840     HInstruction* instruction,
5841     Location out,
5842     uint32_t offset,
5843     Location maybe_temp,
5844     ReadBarrierOption read_barrier_option) {
5845   Primitive::Type type = Primitive::kPrimNot;
5846   Register out_reg = RegisterFrom(out, type);
5847   if (read_barrier_option == kWithReadBarrier) {
5848     CHECK(kEmitCompilerReadBarrier);
5849     if (kUseBakerReadBarrier) {
5850       // Load with fast path based Baker's read barrier.
5851       // /* HeapReference<Object> */ out = *(out + offset)
5852       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5853                                                       out,
5854                                                       out_reg,
5855                                                       offset,
5856                                                       maybe_temp,
5857                                                       /* needs_null_check */ false,
5858                                                       /* use_load_acquire */ false);
5859     } else {
5860       // Load with slow path based read barrier.
5861       // Save the value of `out` into `maybe_temp` before overwriting it
5862       // in the following move operation, as we will need it for the
5863       // read barrier below.
5864       Register temp_reg = RegisterFrom(maybe_temp, type);
5865       __ Mov(temp_reg, out_reg);
5866       // /* HeapReference<Object> */ out = *(out + offset)
5867       __ Ldr(out_reg, HeapOperand(out_reg, offset));
5868       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
5869     }
5870   } else {
5871     // Plain load with no read barrier.
5872     // /* HeapReference<Object> */ out = *(out + offset)
5873     __ Ldr(out_reg, HeapOperand(out_reg, offset));
5874     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5875   }
5876 }
5877 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5878 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
5879     HInstruction* instruction,
5880     Location out,
5881     Location obj,
5882     uint32_t offset,
5883     Location maybe_temp,
5884     ReadBarrierOption read_barrier_option) {
5885   Primitive::Type type = Primitive::kPrimNot;
5886   Register out_reg = RegisterFrom(out, type);
5887   Register obj_reg = RegisterFrom(obj, type);
5888   if (read_barrier_option == kWithReadBarrier) {
5889     CHECK(kEmitCompilerReadBarrier);
5890     if (kUseBakerReadBarrier) {
5891       // Load with fast path based Baker's read barrier.
5892       // /* HeapReference<Object> */ out = *(obj + offset)
5893       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5894                                                       out,
5895                                                       obj_reg,
5896                                                       offset,
5897                                                       maybe_temp,
5898                                                       /* needs_null_check */ false,
5899                                                       /* use_load_acquire */ false);
5900     } else {
5901       // Load with slow path based read barrier.
5902       // /* HeapReference<Object> */ out = *(obj + offset)
5903       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5904       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
5905     }
5906   } else {
5907     // Plain load with no read barrier.
5908     // /* HeapReference<Object> */ out = *(obj + offset)
5909     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5910     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5911   }
5912 }
5913 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)5914 void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
5915     HInstruction* instruction,
5916     Location root,
5917     Register obj,
5918     uint32_t offset,
5919     vixl::aarch64::Label* fixup_label,
5920     ReadBarrierOption read_barrier_option) {
5921   DCHECK(fixup_label == nullptr || offset == 0u);
5922   Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
5923   if (read_barrier_option == kWithReadBarrier) {
5924     DCHECK(kEmitCompilerReadBarrier);
5925     if (kUseBakerReadBarrier) {
5926       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
5927       // Baker's read barrier are used.
5928       if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
5929           !Runtime::Current()->UseJitCompilation()) {
5930         // Note that we do not actually check the value of `GetIsGcMarking()`
5931         // to decide whether to mark the loaded GC root or not.  Instead, we
5932         // load into `temp` the read barrier mark introspection entrypoint.
5933         // If `temp` is null, it means that `GetIsGcMarking()` is false, and
5934         // vice versa.
5935         //
5936         // We use link-time generated thunks for the slow path. That thunk
5937         // checks the reference and jumps to the entrypoint if needed.
5938         //
5939         //     temp = Thread::Current()->pReadBarrierMarkIntrospection
5940         //     lr = &return_address;
5941         //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
5942         //     if (temp != nullptr) {
5943         //        goto gc_root_thunk<root_reg>(lr)
5944         //     }
5945         //   return_address:
5946 
5947         UseScratchRegisterScope temps(GetVIXLAssembler());
5948         DCHECK(temps.IsAvailable(ip0));
5949         DCHECK(temps.IsAvailable(ip1));
5950         temps.Exclude(ip0, ip1);
5951         uint32_t custom_data =
5952             linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
5953         vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
5954 
5955         // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
5956         DCHECK_EQ(ip0.GetCode(), 16u);
5957         const int32_t entry_point_offset =
5958             CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
5959         __ Ldr(ip1, MemOperand(tr, entry_point_offset));
5960         EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
5961         vixl::aarch64::Label return_address;
5962         __ adr(lr, &return_address);
5963         if (fixup_label != nullptr) {
5964           __ Bind(fixup_label);
5965         }
5966         static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
5967                       "GC root LDR must be 2 instruction (8B) before the return address label.");
5968         __ ldr(root_reg, MemOperand(obj.X(), offset));
5969         __ Bind(cbnz_label);
5970         __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
5971         __ Bind(&return_address);
5972       } else {
5973         // Note that we do not actually check the value of
5974         // `GetIsGcMarking()` to decide whether to mark the loaded GC
5975         // root or not.  Instead, we load into `temp` the read barrier
5976         // mark entry point corresponding to register `root`. If `temp`
5977         // is null, it means that `GetIsGcMarking()` is false, and vice
5978         // versa.
5979         //
5980         //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
5981         //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
5982         //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
5983         //     // Slow path.
5984         //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
5985         //   }
5986 
5987         // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
5988         Register temp = lr;
5989         SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
5990             instruction, root, /* entrypoint */ LocationFrom(temp));
5991         codegen_->AddSlowPath(slow_path);
5992 
5993         // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
5994         const int32_t entry_point_offset =
5995             CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
5996         // Loading the entrypoint does not require a load acquire since it is only changed when
5997         // threads are suspended or running a checkpoint.
5998         __ Ldr(temp, MemOperand(tr, entry_point_offset));
5999 
6000         // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6001         if (fixup_label == nullptr) {
6002           __ Ldr(root_reg, MemOperand(obj, offset));
6003         } else {
6004           codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
6005         }
6006         static_assert(
6007             sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6008             "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6009             "have different sizes.");
6010         static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6011                       "art::mirror::CompressedReference<mirror::Object> and int32_t "
6012                       "have different sizes.");
6013 
6014         // The entrypoint is null when the GC is not marking, this prevents one load compared to
6015         // checking GetIsGcMarking.
6016         __ Cbnz(temp, slow_path->GetEntryLabel());
6017         __ Bind(slow_path->GetExitLabel());
6018       }
6019     } else {
6020       // GC root loaded through a slow path for read barriers other
6021       // than Baker's.
6022       // /* GcRoot<mirror::Object>* */ root = obj + offset
6023       if (fixup_label == nullptr) {
6024         __ Add(root_reg.X(), obj.X(), offset);
6025       } else {
6026         codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6027       }
6028       // /* mirror::Object* */ root = root->Read()
6029       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6030     }
6031   } else {
6032     // Plain GC root load with no read barrier.
6033     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6034     if (fixup_label == nullptr) {
6035       __ Ldr(root_reg, MemOperand(obj, offset));
6036     } else {
6037       codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6038     }
6039     // Note that GC roots are not affected by heap poisoning, thus we
6040     // do not have to unpoison `root_reg` here.
6041   }
6042 }
6043 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)6044 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6045                                                                Location ref,
6046                                                                Register obj,
6047                                                                uint32_t offset,
6048                                                                Location maybe_temp,
6049                                                                bool needs_null_check,
6050                                                                bool use_load_acquire) {
6051   DCHECK(kEmitCompilerReadBarrier);
6052   DCHECK(kUseBakerReadBarrier);
6053 
6054   if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
6055       !use_load_acquire &&
6056       !Runtime::Current()->UseJitCompilation()) {
6057     // Note that we do not actually check the value of `GetIsGcMarking()`
6058     // to decide whether to mark the loaded GC root or not.  Instead, we
6059     // load into `temp` the read barrier mark introspection entrypoint.
6060     // If `temp` is null, it means that `GetIsGcMarking()` is false, and
6061     // vice versa.
6062     //
6063     // We use link-time generated thunks for the slow path. That thunk checks
6064     // the holder and jumps to the entrypoint if needed. If the holder is not
6065     // gray, it creates a fake dependency and returns to the LDR instruction.
6066     //
6067     //     temp = Thread::Current()->pReadBarrierMarkIntrospection
6068     //     lr = &return_address;
6069     //     if (temp != nullptr) {
6070     //        goto field_thunk<holder_reg, base_reg>(lr)
6071     //     }
6072     //   not_gray_return_address:
6073     //     // Original reference load. If the offset is too large to fit
6074     //     // into LDR, we use an adjusted base register here.
6075     //     GcRoot<mirror::Object> root = *(obj+offset);
6076     //   gray_return_address:
6077 
6078     DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6079     Register base = obj;
6080     if (offset >= kReferenceLoadMinFarOffset) {
6081       DCHECK(maybe_temp.IsRegister());
6082       base = WRegisterFrom(maybe_temp);
6083       static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6084       __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6085       offset &= (kReferenceLoadMinFarOffset - 1u);
6086     }
6087     UseScratchRegisterScope temps(GetVIXLAssembler());
6088     DCHECK(temps.IsAvailable(ip0));
6089     DCHECK(temps.IsAvailable(ip1));
6090     temps.Exclude(ip0, ip1);
6091     uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
6092         base.GetCode(),
6093         obj.GetCode());
6094     vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
6095 
6096     // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
6097     DCHECK_EQ(ip0.GetCode(), 16u);
6098     const int32_t entry_point_offset =
6099         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
6100     __ Ldr(ip1, MemOperand(tr, entry_point_offset));
6101     EmissionCheckScope guard(GetVIXLAssembler(),
6102                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6103     vixl::aarch64::Label return_address;
6104     __ adr(lr, &return_address);
6105     __ Bind(cbnz_label);
6106     __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
6107     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6108                   "Field LDR must be 1 instruction (4B) before the return address label; "
6109                   " 2 instructions (8B) for heap poisoning.");
6110     Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
6111     __ ldr(ref_reg, MemOperand(base.X(), offset));
6112     if (needs_null_check) {
6113       MaybeRecordImplicitNullCheck(instruction);
6114     }
6115     GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6116     __ Bind(&return_address);
6117     return;
6118   }
6119 
6120   // /* HeapReference<Object> */ ref = *(obj + offset)
6121   Register temp = WRegisterFrom(maybe_temp);
6122   Location no_index = Location::NoLocation();
6123   size_t no_scale_factor = 0u;
6124   GenerateReferenceLoadWithBakerReadBarrier(instruction,
6125                                             ref,
6126                                             obj,
6127                                             offset,
6128                                             no_index,
6129                                             no_scale_factor,
6130                                             temp,
6131                                             needs_null_check,
6132                                             use_load_acquire);
6133 }
6134 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,Register temp,bool needs_null_check)6135 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6136                                                                Location ref,
6137                                                                Register obj,
6138                                                                uint32_t data_offset,
6139                                                                Location index,
6140                                                                Register temp,
6141                                                                bool needs_null_check) {
6142   DCHECK(kEmitCompilerReadBarrier);
6143   DCHECK(kUseBakerReadBarrier);
6144 
6145   // Array cells are never volatile variables, therefore array loads
6146   // never use Load-Acquire instructions on ARM64.
6147   const bool use_load_acquire = false;
6148 
6149   static_assert(
6150       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6151       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6152   // /* HeapReference<Object> */ ref =
6153   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6154   size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
6155   GenerateReferenceLoadWithBakerReadBarrier(instruction,
6156                                             ref,
6157                                             obj,
6158                                             data_offset,
6159                                             index,
6160                                             scale_factor,
6161                                             temp,
6162                                             needs_null_check,
6163                                             use_load_acquire);
6164 }
6165 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,Register temp,bool needs_null_check,bool use_load_acquire,bool always_update_field)6166 void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6167                                                                    Location ref,
6168                                                                    Register obj,
6169                                                                    uint32_t offset,
6170                                                                    Location index,
6171                                                                    size_t scale_factor,
6172                                                                    Register temp,
6173                                                                    bool needs_null_check,
6174                                                                    bool use_load_acquire,
6175                                                                    bool always_update_field) {
6176   DCHECK(kEmitCompilerReadBarrier);
6177   DCHECK(kUseBakerReadBarrier);
6178   // If we are emitting an array load, we should not be using a
6179   // Load Acquire instruction.  In other words:
6180   // `instruction->IsArrayGet()` => `!use_load_acquire`.
6181   DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
6182 
6183   // Query `art::Thread::Current()->GetIsGcMarking()` to decide
6184   // whether we need to enter the slow path to mark the reference.
6185   // Then, in the slow path, check the gray bit in the lock word of
6186   // the reference's holder (`obj`) to decide whether to mark `ref` or
6187   // not.
6188   //
6189   // Note that we do not actually check the value of `GetIsGcMarking()`;
6190   // instead, we load into `temp2` the read barrier mark entry point
6191   // corresponding to register `ref`. If `temp2` is null, it means
6192   // that `GetIsGcMarking()` is false, and vice versa.
6193   //
6194   //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6195   //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
6196   //     // Slow path.
6197   //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6198   //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6199   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
6200   //     bool is_gray = (rb_state == ReadBarrier::GrayState());
6201   //     if (is_gray) {
6202   //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
6203   //     }
6204   //   } else {
6205   //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
6206   //   }
6207 
6208   // Slow path marking the object `ref` when the GC is marking. The
6209   // entrypoint will already be loaded in `temp2`.
6210   Register temp2 = lr;
6211   Location temp2_loc = LocationFrom(temp2);
6212   SlowPathCodeARM64* slow_path;
6213   if (always_update_field) {
6214     // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
6215     // only supports address of the form `obj + field_offset`, where
6216     // `obj` is a register and `field_offset` is a register. Thus
6217     // `offset` and `scale_factor` above are expected to be null in
6218     // this code path.
6219     DCHECK_EQ(offset, 0u);
6220     DCHECK_EQ(scale_factor, 0u);  /* "times 1" */
6221     Location field_offset = index;
6222     slow_path =
6223         new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
6224             instruction,
6225             ref,
6226             obj,
6227             offset,
6228             /* index */ field_offset,
6229             scale_factor,
6230             needs_null_check,
6231             use_load_acquire,
6232             temp,
6233             /* entrypoint */ temp2_loc);
6234   } else {
6235     slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
6236         instruction,
6237         ref,
6238         obj,
6239         offset,
6240         index,
6241         scale_factor,
6242         needs_null_check,
6243         use_load_acquire,
6244         temp,
6245         /* entrypoint */ temp2_loc);
6246   }
6247   AddSlowPath(slow_path);
6248 
6249   // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
6250   const int32_t entry_point_offset =
6251       CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
6252   // Loading the entrypoint does not require a load acquire since it is only changed when
6253   // threads are suspended or running a checkpoint.
6254   __ Ldr(temp2, MemOperand(tr, entry_point_offset));
6255   // The entrypoint is null when the GC is not marking, this prevents one load compared to
6256   // checking GetIsGcMarking.
6257   __ Cbnz(temp2, slow_path->GetEntryLabel());
6258   // Fast path: just load the reference.
6259   GenerateRawReferenceLoad(
6260       instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
6261   __ Bind(slow_path->GetExitLabel());
6262 }
6263 
GenerateRawReferenceLoad(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location index,size_t scale_factor,bool needs_null_check,bool use_load_acquire)6264 void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
6265                                                   Location ref,
6266                                                   Register obj,
6267                                                   uint32_t offset,
6268                                                   Location index,
6269                                                   size_t scale_factor,
6270                                                   bool needs_null_check,
6271                                                   bool use_load_acquire) {
6272   DCHECK(obj.IsW());
6273   Primitive::Type type = Primitive::kPrimNot;
6274   Register ref_reg = RegisterFrom(ref, type);
6275 
6276   // If needed, vixl::EmissionCheckScope guards are used to ensure
6277   // that no pools are emitted between the load (macro) instruction
6278   // and MaybeRecordImplicitNullCheck.
6279 
6280   if (index.IsValid()) {
6281     // Load types involving an "index": ArrayGet,
6282     // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
6283     // intrinsics.
6284     if (use_load_acquire) {
6285       // UnsafeGetObjectVolatile intrinsic case.
6286       // Register `index` is not an index in an object array, but an
6287       // offset to an object reference field within object `obj`.
6288       DCHECK(instruction->IsInvoke()) << instruction->DebugName();
6289       DCHECK(instruction->GetLocations()->Intrinsified());
6290       DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
6291           << instruction->AsInvoke()->GetIntrinsic();
6292       DCHECK_EQ(offset, 0u);
6293       DCHECK_EQ(scale_factor, 0u);
6294       DCHECK_EQ(needs_null_check, false);
6295       // /* HeapReference<mirror::Object> */ ref = *(obj + index)
6296       MemOperand field = HeapOperand(obj, XRegisterFrom(index));
6297       LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
6298     } else {
6299       // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
6300       // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
6301       if (index.IsConstant()) {
6302         uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
6303         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6304         Load(type, ref_reg, HeapOperand(obj, computed_offset));
6305         if (needs_null_check) {
6306           MaybeRecordImplicitNullCheck(instruction);
6307         }
6308       } else {
6309         UseScratchRegisterScope temps(GetVIXLAssembler());
6310         Register temp = temps.AcquireW();
6311         __ Add(temp, obj, offset);
6312         {
6313           EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6314           Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
6315           if (needs_null_check) {
6316             MaybeRecordImplicitNullCheck(instruction);
6317           }
6318         }
6319       }
6320     }
6321   } else {
6322     // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
6323     MemOperand field = HeapOperand(obj, offset);
6324     if (use_load_acquire) {
6325       // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
6326       LoadAcquire(instruction, ref_reg, field, needs_null_check);
6327     } else {
6328       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6329       Load(type, ref_reg, field);
6330       if (needs_null_check) {
6331         MaybeRecordImplicitNullCheck(instruction);
6332       }
6333     }
6334   }
6335 
6336   // Object* ref = ref_addr->AsMirrorPtr()
6337   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6338 }
6339 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6340 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6341                                                  Location out,
6342                                                  Location ref,
6343                                                  Location obj,
6344                                                  uint32_t offset,
6345                                                  Location index) {
6346   DCHECK(kEmitCompilerReadBarrier);
6347 
6348   // Insert a slow path based read barrier *after* the reference load.
6349   //
6350   // If heap poisoning is enabled, the unpoisoning of the loaded
6351   // reference will be carried out by the runtime within the slow
6352   // path.
6353   //
6354   // Note that `ref` currently does not get unpoisoned (when heap
6355   // poisoning is enabled), which is alright as the `ref` argument is
6356   // not used by the artReadBarrierSlow entry point.
6357   //
6358   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6359   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
6360       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6361   AddSlowPath(slow_path);
6362 
6363   __ B(slow_path->GetEntryLabel());
6364   __ Bind(slow_path->GetExitLabel());
6365 }
6366 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6367 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6368                                                       Location out,
6369                                                       Location ref,
6370                                                       Location obj,
6371                                                       uint32_t offset,
6372                                                       Location index) {
6373   if (kEmitCompilerReadBarrier) {
6374     // Baker's read barriers shall be handled by the fast path
6375     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6376     DCHECK(!kUseBakerReadBarrier);
6377     // If heap poisoning is enabled, unpoisoning will be taken care of
6378     // by the runtime within the slow path.
6379     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6380   } else if (kPoisonHeapReferences) {
6381     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6382   }
6383 }
6384 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6385 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6386                                                         Location out,
6387                                                         Location root) {
6388   DCHECK(kEmitCompilerReadBarrier);
6389 
6390   // Insert a slow path based read barrier *after* the GC root load.
6391   //
6392   // Note that GC roots are not affected by heap poisoning, so we do
6393   // not need to do anything special for this here.
6394   SlowPathCodeARM64* slow_path =
6395       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6396   AddSlowPath(slow_path);
6397 
6398   __ B(slow_path->GetEntryLabel());
6399   __ Bind(slow_path->GetExitLabel());
6400 }
6401 
VisitClassTableGet(HClassTableGet * instruction)6402 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6403   LocationSummary* locations =
6404       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6405   locations->SetInAt(0, Location::RequiresRegister());
6406   locations->SetOut(Location::RequiresRegister());
6407 }
6408 
VisitClassTableGet(HClassTableGet * instruction)6409 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
6410   LocationSummary* locations = instruction->GetLocations();
6411   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
6412     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
6413         instruction->GetIndex(), kArm64PointerSize).SizeValue();
6414     __ Ldr(XRegisterFrom(locations->Out()),
6415            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
6416   } else {
6417     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
6418         instruction->GetIndex(), kArm64PointerSize));
6419     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
6420         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
6421     __ Ldr(XRegisterFrom(locations->Out()),
6422            MemOperand(XRegisterFrom(locations->Out()), method_offset));
6423   }
6424 }
6425 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,vixl::aarch64::Literal<uint32_t> * literal,uint64_t index_in_table)6426 static void PatchJitRootUse(uint8_t* code,
6427                             const uint8_t* roots_data,
6428                             vixl::aarch64::Literal<uint32_t>* literal,
6429                             uint64_t index_in_table) {
6430   uint32_t literal_offset = literal->GetOffset();
6431   uintptr_t address =
6432       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
6433   uint8_t* data = code + literal_offset;
6434   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
6435 }
6436 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)6437 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
6438   for (const auto& entry : jit_string_patches_) {
6439     const auto& it = jit_string_roots_.find(entry.first);
6440     DCHECK(it != jit_string_roots_.end());
6441     PatchJitRootUse(code, roots_data, entry.second, it->second);
6442   }
6443   for (const auto& entry : jit_class_patches_) {
6444     const auto& it = jit_class_roots_.find(entry.first);
6445     DCHECK(it != jit_class_roots_.end());
6446     PatchJitRootUse(code, roots_data, entry.second, it->second);
6447   }
6448 }
6449 
6450 #undef __
6451 #undef QUICK_ENTRY_POINT
6452 
6453 }  // namespace arm64
6454 }  // namespace art
6455