• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "aarch64/assembler-aarch64.h"
20 #include "aarch64/registers-aarch64.h"
21 #include "arch/arm64/asm_support_arm64.h"
22 #include "arch/arm64/instruction_set_features_arm64.h"
23 #include "arch/arm64/jni_frame_arm64.h"
24 #include "art_method-inl.h"
25 #include "base/bit_utils.h"
26 #include "base/bit_utils_iterator.h"
27 #include "class_root-inl.h"
28 #include "class_table.h"
29 #include "code_generator_utils.h"
30 #include "entrypoints/quick/quick_entrypoints.h"
31 #include "entrypoints/quick/quick_entrypoints_enum.h"
32 #include "gc/accounting/card_table.h"
33 #include "gc/space/image_space.h"
34 #include "heap_poisoning.h"
35 #include "interpreter/mterp/nterp.h"
36 #include "intrinsics.h"
37 #include "intrinsics_arm64.h"
38 #include "intrinsics_utils.h"
39 #include "linker/linker_patch.h"
40 #include "lock_word.h"
41 #include "mirror/array-inl.h"
42 #include "mirror/class-inl.h"
43 #include "mirror/var_handle.h"
44 #include "offsets.h"
45 #include "optimizing/common_arm64.h"
46 #include "optimizing/nodes.h"
47 #include "thread.h"
48 #include "utils/arm64/assembler_arm64.h"
49 #include "utils/assembler.h"
50 #include "utils/stack_checks.h"
51 
52 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
53 using vixl::ExactAssemblyScope;
54 using vixl::CodeBufferCheckScope;
55 using vixl::EmissionCheckScope;
56 
57 #ifdef __
58 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
59 #endif
60 
61 namespace art HIDDEN {
62 
63 template<class MirrorType>
64 class GcRoot;
65 
66 namespace arm64 {
67 
68 using helpers::ARM64EncodableConstantOrRegister;
69 using helpers::ArtVixlRegCodeCoherentForRegSet;
70 using helpers::CPURegisterFrom;
71 using helpers::DRegisterFrom;
72 using helpers::FPRegisterFrom;
73 using helpers::HeapOperand;
74 using helpers::HeapOperandFrom;
75 using helpers::InputCPURegisterOrZeroRegAt;
76 using helpers::InputFPRegisterAt;
77 using helpers::InputOperandAt;
78 using helpers::InputRegisterAt;
79 using helpers::Int64FromLocation;
80 using helpers::LocationFrom;
81 using helpers::OperandFromMemOperand;
82 using helpers::OutputCPURegister;
83 using helpers::OutputFPRegister;
84 using helpers::OutputRegister;
85 using helpers::RegisterFrom;
86 using helpers::StackOperandFrom;
87 using helpers::VIXLRegCodeFromART;
88 using helpers::WRegisterFrom;
89 using helpers::XRegisterFrom;
90 
91 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
92 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
93 // generates less code/data with a small num_entries.
94 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
95 
96 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
97 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
98 // For the Baker read barrier implementation using link-time generated thunks we need to split
99 // the offset explicitly.
100 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
101 
ARM64Condition(IfCondition cond)102 inline Condition ARM64Condition(IfCondition cond) {
103   switch (cond) {
104     case kCondEQ: return eq;
105     case kCondNE: return ne;
106     case kCondLT: return lt;
107     case kCondLE: return le;
108     case kCondGT: return gt;
109     case kCondGE: return ge;
110     case kCondB:  return lo;
111     case kCondBE: return ls;
112     case kCondA:  return hi;
113     case kCondAE: return hs;
114   }
115   LOG(FATAL) << "Unreachable";
116   UNREACHABLE();
117 }
118 
ARM64FPCondition(IfCondition cond,bool gt_bias)119 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
120   // The ARM64 condition codes can express all the necessary branches, see the
121   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
122   // There is no dex instruction or HIR that would need the missing conditions
123   // "equal or unordered" or "not equal".
124   switch (cond) {
125     case kCondEQ: return eq;
126     case kCondNE: return ne /* unordered */;
127     case kCondLT: return gt_bias ? cc : lt /* unordered */;
128     case kCondLE: return gt_bias ? ls : le /* unordered */;
129     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
130     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
131     default:
132       LOG(FATAL) << "UNREACHABLE";
133       UNREACHABLE();
134   }
135 }
136 
ARM64ReturnLocation(DataType::Type return_type)137 Location ARM64ReturnLocation(DataType::Type return_type) {
138   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
139   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
140   // but we use the exact registers for clarity.
141   if (return_type == DataType::Type::kFloat32) {
142     return LocationFrom(s0);
143   } else if (return_type == DataType::Type::kFloat64) {
144     return LocationFrom(d0);
145   } else if (return_type == DataType::Type::kInt64) {
146     return LocationFrom(x0);
147   } else if (return_type == DataType::Type::kVoid) {
148     return Location::NoLocation();
149   } else {
150     return LocationFrom(w0);
151   }
152 }
153 
GetReturnLocation(DataType::Type return_type)154 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
155   return ARM64ReturnLocation(return_type);
156 }
157 
OneRegInReferenceOutSaveEverythingCallerSaves()158 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
159   InvokeRuntimeCallingConvention calling_convention;
160   RegisterSet caller_saves = RegisterSet::Empty();
161   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
162   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
163             RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
164                          DataType::Type::kReference).GetCode());
165   return caller_saves;
166 }
167 
168 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
169 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
170 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
171 
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)172 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
173   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
174   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
175   for (uint32_t i : LowToHighBits(core_spills)) {
176     // If the register holds an object, update the stack mask.
177     if (locations->RegisterContainsObject(i)) {
178       locations->SetStackBit(stack_offset / kVRegSize);
179     }
180     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
181     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
182     saved_core_stack_offsets_[i] = stack_offset;
183     stack_offset += kXRegSizeInBytes;
184   }
185 
186   const size_t fp_reg_size = codegen->GetSlowPathFPWidth();
187   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
188   for (uint32_t i : LowToHighBits(fp_spills)) {
189     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
190     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
191     saved_fpu_stack_offsets_[i] = stack_offset;
192     stack_offset += fp_reg_size;
193   }
194 
195   InstructionCodeGeneratorARM64* visitor =
196       down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
197   visitor->SaveLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
198 }
199 
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)200 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
201   InstructionCodeGeneratorARM64* visitor =
202       down_cast<CodeGeneratorARM64*>(codegen)->GetInstructionCodeGeneratorArm64();
203   visitor->RestoreLiveRegistersHelper(locations, codegen->GetFirstRegisterSlotInSlowPath());
204 }
205 
206 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
207  public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)208   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
209 
EmitNativeCode(CodeGenerator * codegen)210   void EmitNativeCode(CodeGenerator* codegen) override {
211     LocationSummary* locations = instruction_->GetLocations();
212     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
213 
214     __ Bind(GetEntryLabel());
215     if (instruction_->CanThrowIntoCatchBlock()) {
216       // Live registers will be restored in the catch block if caught.
217       SaveLiveRegisters(codegen, instruction_->GetLocations());
218     }
219     // We're moving two locations to locations that could overlap, so we need a parallel
220     // move resolver.
221     InvokeRuntimeCallingConvention calling_convention;
222     codegen->EmitParallelMoves(locations->InAt(0),
223                                LocationFrom(calling_convention.GetRegisterAt(0)),
224                                DataType::Type::kInt32,
225                                locations->InAt(1),
226                                LocationFrom(calling_convention.GetRegisterAt(1)),
227                                DataType::Type::kInt32);
228     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
229         ? kQuickThrowStringBounds
230         : kQuickThrowArrayBounds;
231     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
232     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
233     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
234   }
235 
IsFatal() const236   bool IsFatal() const override { return true; }
237 
GetDescription() const238   const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; }
239 
240  private:
241   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
242 };
243 
244 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
245  public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)246   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
247 
EmitNativeCode(CodeGenerator * codegen)248   void EmitNativeCode(CodeGenerator* codegen) override {
249     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
250     __ Bind(GetEntryLabel());
251     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
252     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
253   }
254 
IsFatal() const255   bool IsFatal() const override { return true; }
256 
GetDescription() const257   const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; }
258 
259  private:
260   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
261 };
262 
263 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
264  public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at)265   LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at)
266       : SlowPathCodeARM64(at), cls_(cls) {
267     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
268     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
269   }
270 
EmitNativeCode(CodeGenerator * codegen)271   void EmitNativeCode(CodeGenerator* codegen) override {
272     LocationSummary* locations = instruction_->GetLocations();
273     Location out = locations->Out();
274     const uint32_t dex_pc = instruction_->GetDexPc();
275     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
276     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
277 
278     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
279     __ Bind(GetEntryLabel());
280     SaveLiveRegisters(codegen, locations);
281 
282     InvokeRuntimeCallingConvention calling_convention;
283     if (must_resolve_type) {
284       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()) ||
285              arm64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
286              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
287                              &cls_->GetDexFile()));
288       dex::TypeIndex type_index = cls_->GetTypeIndex();
289       __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
290       if (cls_->NeedsAccessCheck()) {
291         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
292         arm64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
293       } else {
294         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
295         arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
296       }
297       // If we also must_do_clinit, the resolved type is now in the correct register.
298     } else {
299       DCHECK(must_do_clinit);
300       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
301       arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)),
302                                   source,
303                                   cls_->GetType());
304     }
305     if (must_do_clinit) {
306       arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
307       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
308     }
309 
310     // Move the class to the desired location.
311     if (out.IsValid()) {
312       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
313       DataType::Type type = instruction_->GetType();
314       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
315     }
316     RestoreLiveRegisters(codegen, locations);
317     __ B(GetExitLabel());
318   }
319 
GetDescription() const320   const char* GetDescription() const override { return "LoadClassSlowPathARM64"; }
321 
322  private:
323   // The class this slow path will load.
324   HLoadClass* const cls_;
325 
326   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
327 };
328 
329 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
330  public:
LoadStringSlowPathARM64(HLoadString * instruction)331   explicit LoadStringSlowPathARM64(HLoadString* instruction)
332       : SlowPathCodeARM64(instruction) {}
333 
EmitNativeCode(CodeGenerator * codegen)334   void EmitNativeCode(CodeGenerator* codegen) override {
335     LocationSummary* locations = instruction_->GetLocations();
336     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
337     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
338 
339     __ Bind(GetEntryLabel());
340     SaveLiveRegisters(codegen, locations);
341 
342     InvokeRuntimeCallingConvention calling_convention;
343     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
344     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
345     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
346     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
347     DataType::Type type = instruction_->GetType();
348     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
349 
350     RestoreLiveRegisters(codegen, locations);
351 
352     __ B(GetExitLabel());
353   }
354 
GetDescription() const355   const char* GetDescription() const override { return "LoadStringSlowPathARM64"; }
356 
357  private:
358   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
359 };
360 
361 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
362  public:
NullCheckSlowPathARM64(HNullCheck * instr)363   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
364 
EmitNativeCode(CodeGenerator * codegen)365   void EmitNativeCode(CodeGenerator* codegen) override {
366     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
367     __ Bind(GetEntryLabel());
368     if (instruction_->CanThrowIntoCatchBlock()) {
369       // Live registers will be restored in the catch block if caught.
370       SaveLiveRegisters(codegen, instruction_->GetLocations());
371     }
372     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
373                                  instruction_,
374                                  instruction_->GetDexPc(),
375                                  this);
376     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
377   }
378 
IsFatal() const379   bool IsFatal() const override { return true; }
380 
GetDescription() const381   const char* GetDescription() const override { return "NullCheckSlowPathARM64"; }
382 
383  private:
384   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
385 };
386 
387 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
388  public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)389   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
390       : SlowPathCodeARM64(instruction), successor_(successor) {}
391 
EmitNativeCode(CodeGenerator * codegen)392   void EmitNativeCode(CodeGenerator* codegen) override {
393     LocationSummary* locations = instruction_->GetLocations();
394     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
395     __ Bind(GetEntryLabel());
396     SaveLiveRegisters(codegen, locations);  // Only saves live vector regs for SIMD.
397     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
398     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
399     RestoreLiveRegisters(codegen, locations);  // Only restores live vector regs for SIMD.
400     if (successor_ == nullptr) {
401       __ B(GetReturnLabel());
402     } else {
403       __ B(arm64_codegen->GetLabelOf(successor_));
404     }
405   }
406 
GetReturnLabel()407   vixl::aarch64::Label* GetReturnLabel() {
408     DCHECK(successor_ == nullptr);
409     return &return_label_;
410   }
411 
GetSuccessor() const412   HBasicBlock* GetSuccessor() const {
413     return successor_;
414   }
415 
GetDescription() const416   const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; }
417 
418  private:
419   // If not null, the block to branch to after the suspend check.
420   HBasicBlock* const successor_;
421 
422   // If `successor_` is null, the label to branch to after the suspend check.
423   vixl::aarch64::Label return_label_;
424 
425   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
426 };
427 
428 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
429  public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)430   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
431       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
432 
EmitNativeCode(CodeGenerator * codegen)433   void EmitNativeCode(CodeGenerator* codegen) override {
434     LocationSummary* locations = instruction_->GetLocations();
435 
436     DCHECK(instruction_->IsCheckCast()
437            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
438     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
439     uint32_t dex_pc = instruction_->GetDexPc();
440 
441     __ Bind(GetEntryLabel());
442 
443     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
444       SaveLiveRegisters(codegen, locations);
445     }
446 
447     // We're moving two locations to locations that could overlap, so we need a parallel
448     // move resolver.
449     InvokeRuntimeCallingConvention calling_convention;
450     codegen->EmitParallelMoves(locations->InAt(0),
451                                LocationFrom(calling_convention.GetRegisterAt(0)),
452                                DataType::Type::kReference,
453                                locations->InAt(1),
454                                LocationFrom(calling_convention.GetRegisterAt(1)),
455                                DataType::Type::kReference);
456     if (instruction_->IsInstanceOf()) {
457       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
458       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
459       DataType::Type ret_type = instruction_->GetType();
460       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
461       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
462     } else {
463       DCHECK(instruction_->IsCheckCast());
464       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
465       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
466     }
467 
468     if (!is_fatal_) {
469       RestoreLiveRegisters(codegen, locations);
470       __ B(GetExitLabel());
471     }
472   }
473 
GetDescription() const474   const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; }
IsFatal() const475   bool IsFatal() const override { return is_fatal_; }
476 
477  private:
478   const bool is_fatal_;
479 
480   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
481 };
482 
483 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
484  public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)485   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
486       : SlowPathCodeARM64(instruction) {}
487 
EmitNativeCode(CodeGenerator * codegen)488   void EmitNativeCode(CodeGenerator* codegen) override {
489     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
490     __ Bind(GetEntryLabel());
491     LocationSummary* locations = instruction_->GetLocations();
492     SaveLiveRegisters(codegen, locations);
493     InvokeRuntimeCallingConvention calling_convention;
494     __ Mov(calling_convention.GetRegisterAt(0),
495            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
496     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
497     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
498   }
499 
GetDescription() const500   const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; }
501 
502  private:
503   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
504 };
505 
506 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
507  public:
ArraySetSlowPathARM64(HInstruction * instruction)508   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
509 
EmitNativeCode(CodeGenerator * codegen)510   void EmitNativeCode(CodeGenerator* codegen) override {
511     LocationSummary* locations = instruction_->GetLocations();
512     __ Bind(GetEntryLabel());
513     SaveLiveRegisters(codegen, locations);
514 
515     InvokeRuntimeCallingConvention calling_convention;
516     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
517     parallel_move.AddMove(
518         locations->InAt(0),
519         LocationFrom(calling_convention.GetRegisterAt(0)),
520         DataType::Type::kReference,
521         nullptr);
522     parallel_move.AddMove(
523         locations->InAt(1),
524         LocationFrom(calling_convention.GetRegisterAt(1)),
525         DataType::Type::kInt32,
526         nullptr);
527     parallel_move.AddMove(
528         locations->InAt(2),
529         LocationFrom(calling_convention.GetRegisterAt(2)),
530         DataType::Type::kReference,
531         nullptr);
532     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
533 
534     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
535     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
536     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
537     RestoreLiveRegisters(codegen, locations);
538     __ B(GetExitLabel());
539   }
540 
GetDescription() const541   const char* GetDescription() const override { return "ArraySetSlowPathARM64"; }
542 
543  private:
544   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
545 };
546 
EmitTable(CodeGeneratorARM64 * codegen)547 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
548   uint32_t num_entries = switch_instr_->GetNumEntries();
549   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
550 
551   // We are about to use the assembler to place literals directly. Make sure we have enough
552   // underlying code buffer and we have generated the jump table with right size.
553   EmissionCheckScope scope(codegen->GetVIXLAssembler(),
554                            num_entries * sizeof(int32_t),
555                            CodeBufferCheckScope::kExactSize);
556 
557   __ Bind(&table_start_);
558   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
559   for (uint32_t i = 0; i < num_entries; i++) {
560     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
561     DCHECK(target_label->IsBound());
562     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
563     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
564     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
565     Literal<int32_t> literal(jump_offset);
566     __ place(&literal);
567   }
568 }
569 
570 // Slow path generating a read barrier for a heap reference.
571 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
572  public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)573   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
574                                            Location out,
575                                            Location ref,
576                                            Location obj,
577                                            uint32_t offset,
578                                            Location index)
579       : SlowPathCodeARM64(instruction),
580         out_(out),
581         ref_(ref),
582         obj_(obj),
583         offset_(offset),
584         index_(index) {
585     DCHECK(gUseReadBarrier);
586     // If `obj` is equal to `out` or `ref`, it means the initial object
587     // has been overwritten by (or after) the heap object reference load
588     // to be instrumented, e.g.:
589     //
590     //   __ Ldr(out, HeapOperand(out, class_offset);
591     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
592     //
593     // In that case, we have lost the information about the original
594     // object, and the emitted read barrier cannot work properly.
595     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
596     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
597   }
598 
EmitNativeCode(CodeGenerator * codegen)599   void EmitNativeCode(CodeGenerator* codegen) override {
600     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
601     LocationSummary* locations = instruction_->GetLocations();
602     DataType::Type type = DataType::Type::kReference;
603     DCHECK(locations->CanCall());
604     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
605     DCHECK(instruction_->IsInstanceFieldGet() ||
606            instruction_->IsPredicatedInstanceFieldGet() ||
607            instruction_->IsStaticFieldGet() ||
608            instruction_->IsArrayGet() ||
609            instruction_->IsInstanceOf() ||
610            instruction_->IsCheckCast() ||
611            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
612         << "Unexpected instruction in read barrier for heap reference slow path: "
613         << instruction_->DebugName();
614     // The read barrier instrumentation of object ArrayGet
615     // instructions does not support the HIntermediateAddress
616     // instruction.
617     DCHECK(!(instruction_->IsArrayGet() &&
618              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
619 
620     __ Bind(GetEntryLabel());
621 
622     SaveLiveRegisters(codegen, locations);
623 
624     // We may have to change the index's value, but as `index_` is a
625     // constant member (like other "inputs" of this slow path),
626     // introduce a copy of it, `index`.
627     Location index = index_;
628     if (index_.IsValid()) {
629       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
630       if (instruction_->IsArrayGet()) {
631         // Compute the actual memory offset and store it in `index`.
632         Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
633         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
634         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
635           // We are about to change the value of `index_reg` (see the
636           // calls to vixl::MacroAssembler::Lsl and
637           // vixl::MacroAssembler::Mov below), but it has
638           // not been saved by the previous call to
639           // art::SlowPathCode::SaveLiveRegisters, as it is a
640           // callee-save register --
641           // art::SlowPathCode::SaveLiveRegisters does not consider
642           // callee-save registers, as it has been designed with the
643           // assumption that callee-save registers are supposed to be
644           // handled by the called function.  So, as a callee-save
645           // register, `index_reg` _would_ eventually be saved onto
646           // the stack, but it would be too late: we would have
647           // changed its value earlier.  Therefore, we manually save
648           // it here into another freely available register,
649           // `free_reg`, chosen of course among the caller-save
650           // registers (as a callee-save `free_reg` register would
651           // exhibit the same problem).
652           //
653           // Note we could have requested a temporary register from
654           // the register allocator instead; but we prefer not to, as
655           // this is a slow path, and we know we can find a
656           // caller-save register that is available.
657           Register free_reg = FindAvailableCallerSaveRegister(codegen);
658           __ Mov(free_reg.W(), index_reg);
659           index_reg = free_reg;
660           index = LocationFrom(index_reg);
661         } else {
662           // The initial register stored in `index_` has already been
663           // saved in the call to art::SlowPathCode::SaveLiveRegisters
664           // (as it is not a callee-save register), so we can freely
665           // use it.
666         }
667         // Shifting the index value contained in `index_reg` by the scale
668         // factor (2) cannot overflow in practice, as the runtime is
669         // unable to allocate object arrays with a size larger than
670         // 2^26 - 1 (that is, 2^28 - 4 bytes).
671         __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
672         static_assert(
673             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
674             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
675         __ Add(index_reg, index_reg, Operand(offset_));
676       } else {
677         // In the case of the following intrinsics `index_` is not shifted by a scale factor of 2
678         // (as in the case of ArrayGet), as it is actually an offset to an object field within an
679         // object.
680         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
681         DCHECK(instruction_->GetLocations()->Intrinsified());
682         HInvoke* invoke = instruction_->AsInvoke();
683         DCHECK(IsUnsafeGetObject(invoke) ||
684                IsVarHandleGet(invoke) ||
685                IsUnsafeCASObject(invoke) ||
686                IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
687         DCHECK_EQ(offset_, 0u);
688         DCHECK(index_.IsRegister());
689       }
690     }
691 
692     // We're moving two or three locations to locations that could
693     // overlap, so we need a parallel move resolver.
694     InvokeRuntimeCallingConvention calling_convention;
695     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
696     parallel_move.AddMove(ref_,
697                           LocationFrom(calling_convention.GetRegisterAt(0)),
698                           type,
699                           nullptr);
700     parallel_move.AddMove(obj_,
701                           LocationFrom(calling_convention.GetRegisterAt(1)),
702                           type,
703                           nullptr);
704     if (index.IsValid()) {
705       parallel_move.AddMove(index,
706                             LocationFrom(calling_convention.GetRegisterAt(2)),
707                             DataType::Type::kInt32,
708                             nullptr);
709       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
710     } else {
711       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
712       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
713     }
714     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
715                                  instruction_,
716                                  instruction_->GetDexPc(),
717                                  this);
718     CheckEntrypointTypes<
719         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
720     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
721 
722     RestoreLiveRegisters(codegen, locations);
723 
724     __ B(GetExitLabel());
725   }
726 
GetDescription() const727   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
728 
729  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)730   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
731     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
732     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
733     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
734       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
735         return Register(VIXLRegCodeFromART(i), kXRegSize);
736       }
737     }
738     // We shall never fail to find a free caller-save register, as
739     // there are more than two core caller-save registers on ARM64
740     // (meaning it is possible to find one which is different from
741     // `ref` and `obj`).
742     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
743     LOG(FATAL) << "Could not find a free register";
744     UNREACHABLE();
745   }
746 
747   const Location out_;
748   const Location ref_;
749   const Location obj_;
750   const uint32_t offset_;
751   // An additional location containing an index to an array.
752   // Only used for HArrayGet and the UnsafeGetObject &
753   // UnsafeGetObjectVolatile intrinsics.
754   const Location index_;
755 
756   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
757 };
758 
759 // Slow path generating a read barrier for a GC root.
760 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
761  public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)762   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
763       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
764     DCHECK(gUseReadBarrier);
765   }
766 
EmitNativeCode(CodeGenerator * codegen)767   void EmitNativeCode(CodeGenerator* codegen) override {
768     LocationSummary* locations = instruction_->GetLocations();
769     DataType::Type type = DataType::Type::kReference;
770     DCHECK(locations->CanCall());
771     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
772     DCHECK(instruction_->IsLoadClass() ||
773            instruction_->IsLoadString() ||
774            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
775         << "Unexpected instruction in read barrier for GC root slow path: "
776         << instruction_->DebugName();
777 
778     __ Bind(GetEntryLabel());
779     SaveLiveRegisters(codegen, locations);
780 
781     InvokeRuntimeCallingConvention calling_convention;
782     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
783     // The argument of the ReadBarrierForRootSlow is not a managed
784     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
785     // thus we need a 64-bit move here, and we cannot use
786     //
787     //   arm64_codegen->MoveLocation(
788     //       LocationFrom(calling_convention.GetRegisterAt(0)),
789     //       root_,
790     //       type);
791     //
792     // which would emit a 32-bit move, as `type` is a (32-bit wide)
793     // reference type (`DataType::Type::kReference`).
794     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
795     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
796                                  instruction_,
797                                  instruction_->GetDexPc(),
798                                  this);
799     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
800     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
801 
802     RestoreLiveRegisters(codegen, locations);
803     __ B(GetExitLabel());
804   }
805 
GetDescription() const806   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; }
807 
808  private:
809   const Location out_;
810   const Location root_;
811 
812   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
813 };
814 
815 class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
816  public:
MethodEntryExitHooksSlowPathARM64(HInstruction * instruction)817   explicit MethodEntryExitHooksSlowPathARM64(HInstruction* instruction)
818       : SlowPathCodeARM64(instruction) {}
819 
EmitNativeCode(CodeGenerator * codegen)820   void EmitNativeCode(CodeGenerator* codegen) override {
821     LocationSummary* locations = instruction_->GetLocations();
822     QuickEntrypointEnum entry_point =
823         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
824     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
825     __ Bind(GetEntryLabel());
826     SaveLiveRegisters(codegen, locations);
827     if (instruction_->IsMethodExitHook()) {
828       __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize());
829     }
830     arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
831     RestoreLiveRegisters(codegen, locations);
832     __ B(GetExitLabel());
833   }
834 
GetDescription() const835   const char* GetDescription() const override {
836     return "MethodEntryExitHooksSlowPath";
837   }
838 
839  private:
840   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARM64);
841 };
842 
843 class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 {
844  public:
CompileOptimizedSlowPathARM64()845   CompileOptimizedSlowPathARM64() : SlowPathCodeARM64(/* instruction= */ nullptr) {}
846 
EmitNativeCode(CodeGenerator * codegen)847   void EmitNativeCode(CodeGenerator* codegen) override {
848     uint32_t entrypoint_offset =
849         GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
850     __ Bind(GetEntryLabel());
851     __ Ldr(lr, MemOperand(tr, entrypoint_offset));
852     // Note: we don't record the call here (and therefore don't generate a stack
853     // map), as the entrypoint should never be suspended.
854     __ Blr(lr);
855     __ B(GetExitLabel());
856   }
857 
GetDescription() const858   const char* GetDescription() const override {
859     return "CompileOptimizedSlowPath";
860   }
861 
862  private:
863   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARM64);
864 };
865 
866 #undef __
867 
GetNextLocation(DataType::Type type)868 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
869   Location next_location;
870   if (type == DataType::Type::kVoid) {
871     LOG(FATAL) << "Unreachable type " << type;
872   }
873 
874   if (DataType::IsFloatingPointType(type) &&
875       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
876     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
877   } else if (!DataType::IsFloatingPointType(type) &&
878              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
879     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
880   } else {
881     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
882     next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
883                                                 : Location::StackSlot(stack_offset);
884   }
885 
886   // Space on the stack is reserved for all arguments.
887   stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
888   return next_location;
889 }
890 
GetMethodLocation() const891 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
892   return LocationFrom(kArtMethodRegister);
893 }
894 
GetNextLocation(DataType::Type type)895 Location CriticalNativeCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
896   DCHECK_NE(type, DataType::Type::kReference);
897 
898   Location location = Location::NoLocation();
899   if (DataType::IsFloatingPointType(type)) {
900     if (fpr_index_ < kParameterFPRegistersLength) {
901       location = LocationFrom(kParameterFPRegisters[fpr_index_]);
902       ++fpr_index_;
903     }
904   } else {
905     // Native ABI uses the same registers as managed, except that the method register x0
906     // is a normal argument.
907     if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
908       location = LocationFrom(gpr_index_ == 0u ? x0 : kParameterCoreRegisters[gpr_index_ - 1u]);
909       ++gpr_index_;
910     }
911   }
912   if (location.IsInvalid()) {
913     if (DataType::Is64BitType(type)) {
914       location = Location::DoubleStackSlot(stack_offset_);
915     } else {
916       location = Location::StackSlot(stack_offset_);
917     }
918     stack_offset_ += kFramePointerSize;
919 
920     if (for_register_allocation_) {
921       location = Location::Any();
922     }
923   }
924   return location;
925 }
926 
GetReturnLocation(DataType::Type type) const927 Location CriticalNativeCallingConventionVisitorARM64::GetReturnLocation(DataType::Type type) const {
928   // We perform conversion to the managed ABI return register after the call if needed.
929   InvokeDexCallingConventionVisitorARM64 dex_calling_convention;
930   return dex_calling_convention.GetReturnLocation(type);
931 }
932 
GetMethodLocation() const933 Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const {
934   // Pass the method in the hidden argument x15.
935   return Location::RegisterLocation(x15.GetCode());
936 }
937 
938 namespace detail {
939 // Mark which intrinsics we don't have handcrafted code for.
940 template <Intrinsics T>
941 struct IsUnimplemented {
942   bool is_unimplemented = false;
943 };
944 
945 #define TRUE_OVERRIDE(Name)                     \
946   template <>                                   \
947   struct IsUnimplemented<Intrinsics::k##Name> { \
948     bool is_unimplemented = true;               \
949   };
950 UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE)
951 #undef TRUE_OVERRIDE
952 
953 #include "intrinsics_list.h"
954 static constexpr bool kIsIntrinsicUnimplemented[] = {
955   false,  // kNone
956 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
957   IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
958   INTRINSICS_LIST(IS_UNIMPLEMENTED)
959 #undef IS_UNIMPLEMENTED
960 };
961 #undef INTRINSICS_LIST
962 
963 }  // namespace detail
964 
CodeGeneratorARM64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)965 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
966                                        const CompilerOptions& compiler_options,
967                                        OptimizingCompilerStats* stats)
968     : CodeGenerator(graph,
969                     kNumberOfAllocatableRegisters,
970                     kNumberOfAllocatableFPRegisters,
971                     kNumberOfAllocatableRegisterPairs,
972                     callee_saved_core_registers.GetList(),
973                     callee_saved_fp_registers.GetList(),
974                     compiler_options,
975                     stats,
976                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
977       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
978       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
979       location_builder_neon_(graph, this),
980       instruction_visitor_neon_(graph, this),
981       location_builder_sve_(graph, this),
982       instruction_visitor_sve_(graph, this),
983       move_resolver_(graph->GetAllocator(), this),
984       assembler_(graph->GetAllocator(),
985                  compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
986       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
987       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
988       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
989       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
990       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
991       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
992       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
993       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
994       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
995       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
996       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
997       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
998       uint32_literals_(std::less<uint32_t>(),
999                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1000       uint64_literals_(std::less<uint64_t>(),
1001                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1002       jit_string_patches_(StringReferenceValueComparator(),
1003                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1004       jit_class_patches_(TypeReferenceValueComparator(),
1005                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1006       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
1007                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1008   // Save the link register (containing the return address) to mimic Quick.
1009   AddAllocatedRegister(LocationFrom(lr));
1010 
1011   bool use_sve = ShouldUseSVE();
1012   if (use_sve) {
1013     location_builder_ = &location_builder_sve_;
1014     instruction_visitor_ = &instruction_visitor_sve_;
1015   } else {
1016     location_builder_ = &location_builder_neon_;
1017     instruction_visitor_ = &instruction_visitor_neon_;
1018   }
1019 }
1020 
ShouldUseSVE() const1021 bool CodeGeneratorARM64::ShouldUseSVE() const {
1022   return GetInstructionSetFeatures().HasSVE();
1023 }
1024 
GetSIMDRegisterWidth() const1025 size_t CodeGeneratorARM64::GetSIMDRegisterWidth() const {
1026   return SupportsPredicatedSIMD()
1027       ? GetInstructionSetFeatures().GetSVEVectorLength() / kBitsPerByte
1028       : vixl::aarch64::kQRegSizeInBytes;
1029 }
1030 
1031 #define __ GetVIXLAssembler()->
1032 
EmitJumpTables()1033 void CodeGeneratorARM64::EmitJumpTables() {
1034   for (auto&& jump_table : jump_tables_) {
1035     jump_table->EmitTable(this);
1036   }
1037 }
1038 
Finalize(CodeAllocator * allocator)1039 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
1040   EmitJumpTables();
1041 
1042   // Emit JIT baker read barrier slow paths.
1043   DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
1044   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
1045     uint32_t encoded_data = entry.first;
1046     vixl::aarch64::Label* slow_path_entry = &entry.second.label;
1047     __ Bind(slow_path_entry);
1048     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
1049   }
1050 
1051   // Ensure we emit the literal pool.
1052   __ FinalizeCode();
1053 
1054   CodeGenerator::Finalize(allocator);
1055 
1056   // Verify Baker read barrier linker patches.
1057   if (kIsDebugBuild) {
1058     ArrayRef<const uint8_t> code = allocator->GetMemory();
1059     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
1060       DCHECK(info.label.IsBound());
1061       uint32_t literal_offset = info.label.GetLocation();
1062       DCHECK_ALIGNED(literal_offset, 4u);
1063 
1064       auto GetInsn = [&code](uint32_t offset) {
1065         DCHECK_ALIGNED(offset, 4u);
1066         return
1067             (static_cast<uint32_t>(code[offset + 0]) << 0) +
1068             (static_cast<uint32_t>(code[offset + 1]) << 8) +
1069             (static_cast<uint32_t>(code[offset + 2]) << 16)+
1070             (static_cast<uint32_t>(code[offset + 3]) << 24);
1071       };
1072 
1073       const uint32_t encoded_data = info.custom_data;
1074       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
1075       // Check that the next instruction matches the expected LDR.
1076       switch (kind) {
1077         case BakerReadBarrierKind::kField:
1078         case BakerReadBarrierKind::kAcquire: {
1079           DCHECK_GE(code.size() - literal_offset, 8u);
1080           uint32_t next_insn = GetInsn(literal_offset + 4u);
1081           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1082           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1083           if (kind == BakerReadBarrierKind::kField) {
1084             // LDR (immediate) with correct base_reg.
1085             CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
1086           } else {
1087             DCHECK(kind == BakerReadBarrierKind::kAcquire);
1088             // LDAR with correct base_reg.
1089             CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5));
1090           }
1091           break;
1092         }
1093         case BakerReadBarrierKind::kArray: {
1094           DCHECK_GE(code.size() - literal_offset, 8u);
1095           uint32_t next_insn = GetInsn(literal_offset + 4u);
1096           // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
1097           // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
1098           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
1099           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1100           CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
1101           CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
1102           break;
1103         }
1104         case BakerReadBarrierKind::kGcRoot: {
1105           DCHECK_GE(literal_offset, 4u);
1106           uint32_t prev_insn = GetInsn(literal_offset - 4u);
1107           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
1108           // Usually LDR (immediate) with correct root_reg but
1109           // we may have a "MOV marked, old_value" for intrinsic CAS.
1110           if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) {    // MOV?
1111             CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);  // LDR?
1112           }
1113           break;
1114         }
1115         default:
1116           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
1117           UNREACHABLE();
1118       }
1119     }
1120   }
1121 }
1122 
PrepareForEmitNativeCode()1123 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1124   // Note: There are 6 kinds of moves:
1125   // 1. constant -> GPR/FPR (non-cycle)
1126   // 2. constant -> stack (non-cycle)
1127   // 3. GPR/FPR -> GPR/FPR
1128   // 4. GPR/FPR -> stack
1129   // 5. stack -> GPR/FPR
1130   // 6. stack -> stack (non-cycle)
1131   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1132   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1133   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1134   // dependency.
1135   vixl_temps_.Open(GetVIXLAssembler());
1136 }
1137 
FinishEmitNativeCode()1138 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1139   vixl_temps_.Close();
1140 }
1141 
AllocateScratchLocationFor(Location::Kind kind)1142 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1143   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1144          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1145          || kind == Location::kSIMDStackSlot);
1146   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1147       ? Location::kFpuRegister
1148       : Location::kRegister;
1149   Location scratch = GetScratchLocation(kind);
1150   if (!scratch.Equals(Location::NoLocation())) {
1151     return scratch;
1152   }
1153   // Allocate from VIXL temp registers.
1154   if (kind == Location::kRegister) {
1155     scratch = LocationFrom(vixl_temps_.AcquireX());
1156   } else {
1157     DCHECK_EQ(kind, Location::kFpuRegister);
1158     scratch = codegen_->GetGraph()->HasSIMD()
1159         ? codegen_->GetInstructionCodeGeneratorArm64()->AllocateSIMDScratchLocation(&vixl_temps_)
1160         : LocationFrom(vixl_temps_.AcquireD());
1161   }
1162   AddScratchLocation(scratch);
1163   return scratch;
1164 }
1165 
FreeScratchLocation(Location loc)1166 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1167   if (loc.IsRegister()) {
1168     vixl_temps_.Release(XRegisterFrom(loc));
1169   } else {
1170     DCHECK(loc.IsFpuRegister());
1171     if (codegen_->GetGraph()->HasSIMD()) {
1172       codegen_->GetInstructionCodeGeneratorArm64()->FreeSIMDScratchLocation(loc, &vixl_temps_);
1173     } else {
1174       vixl_temps_.Release(DRegisterFrom(loc));
1175     }
1176   }
1177   RemoveScratchLocation(loc);
1178 }
1179 
EmitMove(size_t index)1180 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1181   MoveOperands* move = moves_[index];
1182   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1183 }
1184 
VisitMethodExitHook(HMethodExitHook * method_hook)1185 void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1186   LocationSummary* locations = new (GetGraph()->GetAllocator())
1187       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1188   DataType::Type return_type = method_hook->InputAt(0)->GetType();
1189   locations->SetInAt(0, ARM64ReturnLocation(return_type));
1190 }
1191 
GenerateMethodEntryExitHook(HInstruction * instruction)1192 void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1193   MacroAssembler* masm = GetVIXLAssembler();
1194   UseScratchRegisterScope temps(masm);
1195   Register temp = temps.AcquireX();
1196   Register value = temps.AcquireW();
1197 
1198   SlowPathCodeARM64* slow_path =
1199       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
1200   codegen_->AddSlowPath(slow_path);
1201 
1202   if (instruction->IsMethodExitHook()) {
1203     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1204     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1205     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1206     // disabled in debuggable runtime. The other bit is used when this method itself requires a
1207     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1208     __ Ldr(value, MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1209     __ Cbnz(value, slow_path->GetEntryLabel());
1210   }
1211 
1212   uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1213   MemberOffset  offset = instruction->IsMethodExitHook() ?
1214       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1215       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1216   __ Mov(temp, address + offset.Int32Value());
1217   __ Ldrb(value, MemOperand(temp, 0));
1218   __ Cbnz(value, slow_path->GetEntryLabel());
1219   __ Bind(slow_path->GetExitLabel());
1220 }
1221 
VisitMethodExitHook(HMethodExitHook * instruction)1222 void InstructionCodeGeneratorARM64::VisitMethodExitHook(HMethodExitHook* instruction) {
1223   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1224   DCHECK(codegen_->RequiresCurrentMethod());
1225   GenerateMethodEntryExitHook(instruction);
1226 }
1227 
VisitMethodEntryHook(HMethodEntryHook * method_hook)1228 void LocationsBuilderARM64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1229   new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1230 }
1231 
VisitMethodEntryHook(HMethodEntryHook * instruction)1232 void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1233   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1234   DCHECK(codegen_->RequiresCurrentMethod());
1235   GenerateMethodEntryExitHook(instruction);
1236 }
1237 
MaybeIncrementHotness(bool is_frame_entry)1238 void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
1239   MacroAssembler* masm = GetVIXLAssembler();
1240   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1241     UseScratchRegisterScope temps(masm);
1242     Register counter = temps.AcquireX();
1243     Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX();
1244     if (!is_frame_entry) {
1245       __ Ldr(method, MemOperand(sp, 0));
1246     }
1247     __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1248     vixl::aarch64::Label done;
1249     DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
1250     __ Cbz(counter, &done);
1251     __ Add(counter, counter, -1);
1252     __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1253     __ Bind(&done);
1254   }
1255 
1256   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1257     SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARM64();
1258     AddSlowPath(slow_path);
1259     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1260     DCHECK(info != nullptr);
1261     DCHECK(!HasEmptyFrame());
1262     uint64_t address = reinterpret_cast64<uint64_t>(info);
1263     vixl::aarch64::Label done;
1264     UseScratchRegisterScope temps(masm);
1265     Register temp = temps.AcquireX();
1266     Register counter = temps.AcquireW();
1267     __ Ldr(temp, DeduplicateUint64Literal(address));
1268     __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1269     __ Cbz(counter, slow_path->GetEntryLabel());
1270     __ Add(counter, counter, -1);
1271     __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1272     __ Bind(slow_path->GetExitLabel());
1273   }
1274 }
1275 
GenerateFrameEntry()1276 void CodeGeneratorARM64::GenerateFrameEntry() {
1277   MacroAssembler* masm = GetVIXLAssembler();
1278 
1279   // Check if we need to generate the clinit check. We will jump to the
1280   // resolution stub if the class is not initialized and the executing thread is
1281   // not the thread initializing it.
1282   // We do this before constructing the frame to get the correct stack trace if
1283   // an exception is thrown.
1284   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1285     UseScratchRegisterScope temps(masm);
1286     vixl::aarch64::Label resolution;
1287     vixl::aarch64::Label memory_barrier;
1288 
1289     Register temp1 = temps.AcquireW();
1290     Register temp2 = temps.AcquireW();
1291 
1292     // Check if we're visibly initialized.
1293 
1294     // We don't emit a read barrier here to save on code size. We rely on the
1295     // resolution trampoline to do a suspend check before re-entering this code.
1296     __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
1297     __ Ldrb(temp2, HeapOperand(temp1, status_byte_offset));
1298     __ Cmp(temp2, shifted_visibly_initialized_value);
1299     __ B(hs, &frame_entry_label_);
1300 
1301     // Check if we're initialized and jump to code that does a memory barrier if
1302     // so.
1303     __ Cmp(temp2, shifted_initialized_value);
1304     __ B(hs, &memory_barrier);
1305 
1306     // Check if we're initializing and the thread initializing is the one
1307     // executing the code.
1308     __ Cmp(temp2, shifted_initializing_value);
1309     __ B(lo, &resolution);
1310 
1311     __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1312     __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value()));
1313     __ Cmp(temp1, temp2);
1314     __ B(eq, &frame_entry_label_);
1315     __ Bind(&resolution);
1316 
1317     // Jump to the resolution stub.
1318     ThreadOffset64 entrypoint_offset =
1319         GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
1320     __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
1321     __ Br(temp1.X());
1322 
1323     __ Bind(&memory_barrier);
1324     GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
1325   }
1326   __ Bind(&frame_entry_label_);
1327 
1328   bool do_overflow_check =
1329       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1330   if (do_overflow_check) {
1331     UseScratchRegisterScope temps(masm);
1332     Register temp = temps.AcquireX();
1333     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1334     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1335     {
1336       // Ensure that between load and RecordPcInfo there are no pools emitted.
1337       ExactAssemblyScope eas(GetVIXLAssembler(),
1338                              kInstructionSize,
1339                              CodeBufferCheckScope::kExactSize);
1340       __ ldr(wzr, MemOperand(temp, 0));
1341       RecordPcInfo(nullptr, 0);
1342     }
1343   }
1344 
1345   if (!HasEmptyFrame()) {
1346     // Stack layout:
1347     //      sp[frame_size - 8]        : lr.
1348     //      ...                       : other preserved core registers.
1349     //      ...                       : other preserved fp registers.
1350     //      ...                       : reserved frame space.
1351     //      sp[0]                     : current method.
1352     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1353     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1354     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1355     DCHECK(!preserved_core_registers.IsEmpty());
1356     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1357     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1358 
1359     // Save the current method if we need it, or if using STP reduces code
1360     // size. Note that we do not do this in HCurrentMethod, as the
1361     // instruction might have been removed in the SSA graph.
1362     CPURegister lowest_spill;
1363     if (core_spills_offset == kXRegSizeInBytes) {
1364       // If there is no gap between the method and the lowest core spill, use
1365       // aligned STP pre-index to store both. Max difference is 512. We do
1366       // that to reduce code size even if we do not have to save the method.
1367       DCHECK_LE(frame_size, 512);  // 32 core registers are only 256 bytes.
1368       lowest_spill = preserved_core_registers.PopLowestIndex();
1369       __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex));
1370     } else if (RequiresCurrentMethod()) {
1371       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1372     } else {
1373       __ Claim(frame_size);
1374     }
1375     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1376     if (lowest_spill.IsValid()) {
1377       GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset);
1378       core_spills_offset += kXRegSizeInBytes;
1379     }
1380     GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset);
1381     GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset);
1382 
1383     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1384       // Initialize should_deoptimize flag to 0.
1385       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1386       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1387     }
1388   }
1389   MaybeIncrementHotness(/* is_frame_entry= */ true);
1390   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
1391 }
1392 
GenerateFrameExit()1393 void CodeGeneratorARM64::GenerateFrameExit() {
1394   GetAssembler()->cfi().RememberState();
1395   if (!HasEmptyFrame()) {
1396     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1397     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1398     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1399     DCHECK(!preserved_core_registers.IsEmpty());
1400     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1401     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1402 
1403     CPURegister lowest_spill;
1404     if (core_spills_offset == kXRegSizeInBytes) {
1405       // If there is no gap between the method and the lowest core spill, use
1406       // aligned LDP pre-index to pop both. Max difference is 504. We do
1407       // that to reduce code size even though the loaded method is unused.
1408       DCHECK_LE(frame_size, 504);  // 32 core registers are only 256 bytes.
1409       lowest_spill = preserved_core_registers.PopLowestIndex();
1410       core_spills_offset += kXRegSizeInBytes;
1411     }
1412     GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset);
1413     GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset);
1414     if (lowest_spill.IsValid()) {
1415       __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex));
1416       GetAssembler()->cfi().Restore(DWARFReg(lowest_spill));
1417     } else {
1418       __ Drop(frame_size);
1419     }
1420     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1421   }
1422   __ Ret();
1423   GetAssembler()->cfi().RestoreState();
1424   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1425 }
1426 
GetFramePreservedCoreRegisters() const1427 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1428   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1429   return CPURegList(CPURegister::kRegister, kXRegSize,
1430                     core_spill_mask_);
1431 }
1432 
GetFramePreservedFPRegisters() const1433 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1434   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1435                                          GetNumberOfFloatingPointRegisters()));
1436   return CPURegList(CPURegister::kVRegister, kDRegSize,
1437                     fpu_spill_mask_);
1438 }
1439 
Bind(HBasicBlock * block)1440 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1441   __ Bind(GetLabelOf(block));
1442 }
1443 
MoveConstant(Location location,int32_t value)1444 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1445   DCHECK(location.IsRegister());
1446   __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1447 }
1448 
AddLocationAsTemp(Location location,LocationSummary * locations)1449 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1450   if (location.IsRegister()) {
1451     locations->AddTemp(location);
1452   } else {
1453     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1454   }
1455 }
1456 
MarkGCCard(Register object,Register value,bool emit_null_check)1457 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool emit_null_check) {
1458   UseScratchRegisterScope temps(GetVIXLAssembler());
1459   Register card = temps.AcquireX();
1460   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
1461   vixl::aarch64::Label done;
1462   if (emit_null_check) {
1463     __ Cbz(value, &done);
1464   }
1465   // Load the address of the card table into `card`.
1466   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1467   // Calculate the offset (in the card table) of the card corresponding to
1468   // `object`.
1469   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1470   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
1471   // `object`'s card.
1472   //
1473   // Register `card` contains the address of the card table. Note that the card
1474   // table's base is biased during its creation so that it always starts at an
1475   // address whose least-significant byte is equal to `kCardDirty` (see
1476   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
1477   // below writes the `kCardDirty` (byte) value into the `object`'s card
1478   // (located at `card + object >> kCardShift`).
1479   //
1480   // This dual use of the value in register `card` (1. to calculate the location
1481   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
1482   // (no need to explicitly load `kCardDirty` as an immediate value).
1483   __ Strb(card, MemOperand(card, temp.X()));
1484   if (emit_null_check) {
1485     __ Bind(&done);
1486   }
1487 }
1488 
SetupBlockedRegisters() const1489 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1490   // Blocked core registers:
1491   //      lr        : Runtime reserved.
1492   //      tr        : Runtime reserved.
1493   //      mr        : Runtime reserved.
1494   //      ip1       : VIXL core temp.
1495   //      ip0       : VIXL core temp.
1496   //      x18       : Platform register.
1497   //
1498   // Blocked fp registers:
1499   //      d31       : VIXL fp temp.
1500   CPURegList reserved_core_registers = vixl_reserved_core_registers;
1501   reserved_core_registers.Combine(runtime_reserved_core_registers);
1502   while (!reserved_core_registers.IsEmpty()) {
1503     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1504   }
1505   blocked_core_registers_[X18] = true;
1506 
1507   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1508   while (!reserved_fp_registers.IsEmpty()) {
1509     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1510   }
1511 
1512   if (GetGraph()->IsDebuggable()) {
1513     // Stubs do not save callee-save floating point registers. If the graph
1514     // is debuggable, we need to deal with these registers differently. For
1515     // now, just block them.
1516     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1517     while (!reserved_fp_registers_debuggable.IsEmpty()) {
1518       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1519     }
1520   }
1521 }
1522 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1523 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1524   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1525   __ Str(reg, MemOperand(sp, stack_index));
1526   return kArm64WordSize;
1527 }
1528 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1529 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1530   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1531   __ Ldr(reg, MemOperand(sp, stack_index));
1532   return kArm64WordSize;
1533 }
1534 
SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1535 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1536                                                      uint32_t reg_id ATTRIBUTE_UNUSED) {
1537   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1538              << "use SaveRestoreLiveRegistersHelper";
1539   UNREACHABLE();
1540 }
1541 
RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,uint32_t reg_id ATTRIBUTE_UNUSED)1542 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1543                                                         uint32_t reg_id ATTRIBUTE_UNUSED) {
1544   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1545              << "use SaveRestoreLiveRegistersHelper";
1546   UNREACHABLE();
1547 }
1548 
DumpCoreRegister(std::ostream & stream,int reg) const1549 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1550   stream << XRegister(reg);
1551 }
1552 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1553 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1554   stream << DRegister(reg);
1555 }
1556 
GetInstructionSetFeatures() const1557 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const {
1558   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures();
1559 }
1560 
MoveConstant(CPURegister destination,HConstant * constant)1561 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1562   if (constant->IsIntConstant()) {
1563     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1564   } else if (constant->IsLongConstant()) {
1565     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1566   } else if (constant->IsNullConstant()) {
1567     __ Mov(Register(destination), 0);
1568   } else if (constant->IsFloatConstant()) {
1569     __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue());
1570   } else {
1571     DCHECK(constant->IsDoubleConstant());
1572     __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue());
1573   }
1574 }
1575 
1576 
CoherentConstantAndType(Location constant,DataType::Type type)1577 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1578   DCHECK(constant.IsConstant());
1579   HConstant* cst = constant.GetConstant();
1580   return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1581          // Null is mapped to a core W register, which we associate with kPrimInt.
1582          (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1583          (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1584          (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1585          (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1586 }
1587 
1588 // Allocate a scratch register from the VIXL pool, querying first
1589 // the floating-point register pool, and then the core register
1590 // pool. This is essentially a reimplementation of
1591 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1592 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1593 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1594                                                     vixl::aarch64::UseScratchRegisterScope* temps,
1595                                                     int size_in_bits) {
1596   return masm->GetScratchVRegisterList()->IsEmpty()
1597       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1598       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1599 }
1600 
MoveLocation(Location destination,Location source,DataType::Type dst_type)1601 void CodeGeneratorARM64::MoveLocation(Location destination,
1602                                       Location source,
1603                                       DataType::Type dst_type) {
1604   if (source.Equals(destination)) {
1605     return;
1606   }
1607 
1608   // A valid move can always be inferred from the destination and source
1609   // locations. When moving from and to a register, the argument type can be
1610   // used to generate 32bit instead of 64bit moves. In debug mode we also
1611   // checks the coherency of the locations and the type.
1612   bool unspecified_type = (dst_type == DataType::Type::kVoid);
1613 
1614   if (destination.IsRegister() || destination.IsFpuRegister()) {
1615     if (unspecified_type) {
1616       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1617       if (source.IsStackSlot() ||
1618           (src_cst != nullptr && (src_cst->IsIntConstant()
1619                                   || src_cst->IsFloatConstant()
1620                                   || src_cst->IsNullConstant()))) {
1621         // For stack slots and 32bit constants, a 64bit type is appropriate.
1622         dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1623       } else {
1624         // If the source is a double stack slot or a 64bit constant, a 64bit
1625         // type is appropriate. Else the source is a register, and since the
1626         // type has not been specified, we chose a 64bit type to force a 64bit
1627         // move.
1628         dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1629       }
1630     }
1631     DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1632            (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1633     CPURegister dst = CPURegisterFrom(destination, dst_type);
1634     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1635       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1636       __ Ldr(dst, StackOperandFrom(source));
1637     } else if (source.IsSIMDStackSlot()) {
1638       GetInstructionCodeGeneratorArm64()->LoadSIMDRegFromStack(destination, source);
1639     } else if (source.IsConstant()) {
1640       DCHECK(CoherentConstantAndType(source, dst_type));
1641       MoveConstant(dst, source.GetConstant());
1642     } else if (source.IsRegister()) {
1643       if (destination.IsRegister()) {
1644         __ Mov(Register(dst), RegisterFrom(source, dst_type));
1645       } else {
1646         DCHECK(destination.IsFpuRegister());
1647         DataType::Type source_type = DataType::Is64BitType(dst_type)
1648             ? DataType::Type::kInt64
1649             : DataType::Type::kInt32;
1650         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1651       }
1652     } else {
1653       DCHECK(source.IsFpuRegister());
1654       if (destination.IsRegister()) {
1655         DataType::Type source_type = DataType::Is64BitType(dst_type)
1656             ? DataType::Type::kFloat64
1657             : DataType::Type::kFloat32;
1658         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1659       } else {
1660         DCHECK(destination.IsFpuRegister());
1661         if (GetGraph()->HasSIMD()) {
1662           GetInstructionCodeGeneratorArm64()->MoveSIMDRegToSIMDReg(destination, source);
1663         } else {
1664           __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
1665         }
1666       }
1667     }
1668   } else if (destination.IsSIMDStackSlot()) {
1669     GetInstructionCodeGeneratorArm64()->MoveToSIMDStackSlot(destination, source);
1670   } else {  // The destination is not a register. It must be a stack slot.
1671     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1672     if (source.IsRegister() || source.IsFpuRegister()) {
1673       if (unspecified_type) {
1674         if (source.IsRegister()) {
1675           dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1676         } else {
1677           dst_type =
1678               destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1679         }
1680       }
1681       DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1682              (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1683       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1684     } else if (source.IsConstant()) {
1685       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1686           << source << " " << dst_type;
1687       UseScratchRegisterScope temps(GetVIXLAssembler());
1688       HConstant* src_cst = source.GetConstant();
1689       CPURegister temp;
1690       if (src_cst->IsZeroBitPattern()) {
1691         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1692             ? Register(xzr)
1693             : Register(wzr);
1694       } else {
1695         if (src_cst->IsIntConstant()) {
1696           temp = temps.AcquireW();
1697         } else if (src_cst->IsLongConstant()) {
1698           temp = temps.AcquireX();
1699         } else if (src_cst->IsFloatConstant()) {
1700           temp = temps.AcquireS();
1701         } else {
1702           DCHECK(src_cst->IsDoubleConstant());
1703           temp = temps.AcquireD();
1704         }
1705         MoveConstant(temp, src_cst);
1706       }
1707       __ Str(temp, StackOperandFrom(destination));
1708     } else {
1709       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1710       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1711       UseScratchRegisterScope temps(GetVIXLAssembler());
1712       // Use any scratch register (a core or a floating-point one)
1713       // from VIXL scratch register pools as a temporary.
1714       //
1715       // We used to only use the FP scratch register pool, but in some
1716       // rare cases the only register from this pool (D31) would
1717       // already be used (e.g. within a ParallelMove instruction, when
1718       // a move is blocked by a another move requiring a scratch FP
1719       // register, which would reserve D31). To prevent this issue, we
1720       // ask for a scratch register of any type (core or FP).
1721       //
1722       // Also, we start by asking for a FP scratch register first, as the
1723       // demand of scratch core registers is higher. This is why we
1724       // use AcquireFPOrCoreCPURegisterOfSize instead of
1725       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1726       // allocates core scratch registers first.
1727       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1728           GetVIXLAssembler(),
1729           &temps,
1730           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1731       __ Ldr(temp, StackOperandFrom(source));
1732       __ Str(temp, StackOperandFrom(destination));
1733     }
1734   }
1735 }
1736 
Load(DataType::Type type,CPURegister dst,const MemOperand & src)1737 void CodeGeneratorARM64::Load(DataType::Type type,
1738                               CPURegister dst,
1739                               const MemOperand& src) {
1740   switch (type) {
1741     case DataType::Type::kBool:
1742     case DataType::Type::kUint8:
1743       __ Ldrb(Register(dst), src);
1744       break;
1745     case DataType::Type::kInt8:
1746       __ Ldrsb(Register(dst), src);
1747       break;
1748     case DataType::Type::kUint16:
1749       __ Ldrh(Register(dst), src);
1750       break;
1751     case DataType::Type::kInt16:
1752       __ Ldrsh(Register(dst), src);
1753       break;
1754     case DataType::Type::kInt32:
1755     case DataType::Type::kReference:
1756     case DataType::Type::kInt64:
1757     case DataType::Type::kFloat32:
1758     case DataType::Type::kFloat64:
1759       DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1760       __ Ldr(dst, src);
1761       break;
1762     case DataType::Type::kUint32:
1763     case DataType::Type::kUint64:
1764     case DataType::Type::kVoid:
1765       LOG(FATAL) << "Unreachable type " << type;
1766   }
1767 }
1768 
LoadAcquire(HInstruction * instruction,DataType::Type type,CPURegister dst,const MemOperand & src,bool needs_null_check)1769 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1770                                      DataType::Type type,
1771                                      CPURegister dst,
1772                                      const MemOperand& src,
1773                                      bool needs_null_check) {
1774   MacroAssembler* masm = GetVIXLAssembler();
1775   UseScratchRegisterScope temps(masm);
1776   Register temp_base = temps.AcquireX();
1777 
1778   DCHECK(!src.IsPreIndex());
1779   DCHECK(!src.IsPostIndex());
1780 
1781   // TODO(vixl): Let the MacroAssembler handle MemOperand.
1782   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1783   {
1784     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1785     MemOperand base = MemOperand(temp_base);
1786     switch (type) {
1787       case DataType::Type::kBool:
1788       case DataType::Type::kUint8:
1789       case DataType::Type::kInt8:
1790         {
1791           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1792           __ ldarb(Register(dst), base);
1793           if (needs_null_check) {
1794             MaybeRecordImplicitNullCheck(instruction);
1795           }
1796         }
1797         if (type == DataType::Type::kInt8) {
1798           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1799         }
1800         break;
1801       case DataType::Type::kUint16:
1802       case DataType::Type::kInt16:
1803         {
1804           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1805           __ ldarh(Register(dst), base);
1806           if (needs_null_check) {
1807             MaybeRecordImplicitNullCheck(instruction);
1808           }
1809         }
1810         if (type == DataType::Type::kInt16) {
1811           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1812         }
1813         break;
1814       case DataType::Type::kInt32:
1815       case DataType::Type::kReference:
1816       case DataType::Type::kInt64:
1817         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1818         {
1819           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1820           __ ldar(Register(dst), base);
1821           if (needs_null_check) {
1822             MaybeRecordImplicitNullCheck(instruction);
1823           }
1824         }
1825         break;
1826       case DataType::Type::kFloat32:
1827       case DataType::Type::kFloat64: {
1828         DCHECK(dst.IsFPRegister());
1829         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1830 
1831         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1832         {
1833           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1834           __ ldar(temp, base);
1835           if (needs_null_check) {
1836             MaybeRecordImplicitNullCheck(instruction);
1837           }
1838         }
1839         __ Fmov(VRegister(dst), temp);
1840         break;
1841       }
1842       case DataType::Type::kUint32:
1843       case DataType::Type::kUint64:
1844       case DataType::Type::kVoid:
1845         LOG(FATAL) << "Unreachable type " << type;
1846     }
1847   }
1848 }
1849 
Store(DataType::Type type,CPURegister src,const MemOperand & dst)1850 void CodeGeneratorARM64::Store(DataType::Type type,
1851                                CPURegister src,
1852                                const MemOperand& dst) {
1853   switch (type) {
1854     case DataType::Type::kBool:
1855     case DataType::Type::kUint8:
1856     case DataType::Type::kInt8:
1857       __ Strb(Register(src), dst);
1858       break;
1859     case DataType::Type::kUint16:
1860     case DataType::Type::kInt16:
1861       __ Strh(Register(src), dst);
1862       break;
1863     case DataType::Type::kInt32:
1864     case DataType::Type::kReference:
1865     case DataType::Type::kInt64:
1866     case DataType::Type::kFloat32:
1867     case DataType::Type::kFloat64:
1868       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1869       __ Str(src, dst);
1870       break;
1871     case DataType::Type::kUint32:
1872     case DataType::Type::kUint64:
1873     case DataType::Type::kVoid:
1874       LOG(FATAL) << "Unreachable type " << type;
1875   }
1876 }
1877 
StoreRelease(HInstruction * instruction,DataType::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)1878 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
1879                                       DataType::Type type,
1880                                       CPURegister src,
1881                                       const MemOperand& dst,
1882                                       bool needs_null_check) {
1883   MacroAssembler* masm = GetVIXLAssembler();
1884   UseScratchRegisterScope temps(GetVIXLAssembler());
1885   Register temp_base = temps.AcquireX();
1886 
1887   DCHECK(!dst.IsPreIndex());
1888   DCHECK(!dst.IsPostIndex());
1889 
1890   // TODO(vixl): Let the MacroAssembler handle this.
1891   Operand op = OperandFromMemOperand(dst);
1892   __ Add(temp_base, dst.GetBaseRegister(), op);
1893   MemOperand base = MemOperand(temp_base);
1894   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
1895   switch (type) {
1896     case DataType::Type::kBool:
1897     case DataType::Type::kUint8:
1898     case DataType::Type::kInt8:
1899       {
1900         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1901         __ stlrb(Register(src), base);
1902         if (needs_null_check) {
1903           MaybeRecordImplicitNullCheck(instruction);
1904         }
1905       }
1906       break;
1907     case DataType::Type::kUint16:
1908     case DataType::Type::kInt16:
1909       {
1910         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1911         __ stlrh(Register(src), base);
1912         if (needs_null_check) {
1913           MaybeRecordImplicitNullCheck(instruction);
1914         }
1915       }
1916       break;
1917     case DataType::Type::kInt32:
1918     case DataType::Type::kReference:
1919     case DataType::Type::kInt64:
1920       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1921       {
1922         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1923         __ stlr(Register(src), base);
1924         if (needs_null_check) {
1925           MaybeRecordImplicitNullCheck(instruction);
1926         }
1927       }
1928       break;
1929     case DataType::Type::kFloat32:
1930     case DataType::Type::kFloat64: {
1931       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1932       Register temp_src;
1933       if (src.IsZero()) {
1934         // The zero register is used to avoid synthesizing zero constants.
1935         temp_src = Register(src);
1936       } else {
1937         DCHECK(src.IsFPRegister());
1938         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1939         __ Fmov(temp_src, VRegister(src));
1940       }
1941       {
1942         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1943         __ stlr(temp_src, base);
1944         if (needs_null_check) {
1945           MaybeRecordImplicitNullCheck(instruction);
1946         }
1947       }
1948       break;
1949     }
1950     case DataType::Type::kUint32:
1951     case DataType::Type::kUint64:
1952     case DataType::Type::kVoid:
1953       LOG(FATAL) << "Unreachable type " << type;
1954   }
1955 }
1956 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1957 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1958                                        HInstruction* instruction,
1959                                        uint32_t dex_pc,
1960                                        SlowPathCode* slow_path) {
1961   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1962 
1963   ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint);
1964   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
1965   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
1966   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
1967   if (slow_path == nullptr || GetCompilerOptions().IsJitCompiler()) {
1968     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
1969     // Ensure the pc position is recorded immediately after the `blr` instruction.
1970     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
1971     __ blr(lr);
1972     if (EntrypointRequiresStackMap(entrypoint)) {
1973       RecordPcInfo(instruction, dex_pc, slow_path);
1974     }
1975   } else {
1976     // Ensure the pc position is recorded immediately after the `bl` instruction.
1977     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
1978     EmitEntrypointThunkCall(entrypoint_offset);
1979     if (EntrypointRequiresStackMap(entrypoint)) {
1980       RecordPcInfo(instruction, dex_pc, slow_path);
1981     }
1982   }
1983 }
1984 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1985 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1986                                                              HInstruction* instruction,
1987                                                              SlowPathCode* slow_path) {
1988   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1989   __ Ldr(lr, MemOperand(tr, entry_point_offset));
1990   __ Blr(lr);
1991 }
1992 
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)1993 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
1994                                                                      Register class_reg) {
1995   UseScratchRegisterScope temps(GetVIXLAssembler());
1996   Register temp = temps.AcquireW();
1997 
1998   // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
1999   // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
2000   // size, load only the high byte of the field and compare with 0xf0.
2001   // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks
2002   // show that this pattern is slower (tested on little cores).
2003   __ Ldrb(temp, HeapOperand(class_reg, status_byte_offset));
2004   __ Cmp(temp, shifted_visibly_initialized_value);
2005   __ B(lo, slow_path->GetEntryLabel());
2006   __ Bind(slow_path->GetExitLabel());
2007 }
2008 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl::aarch64::Register temp)2009 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
2010     HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
2011   uint32_t path_to_root = check->GetBitstringPathToRoot();
2012   uint32_t mask = check->GetBitstringMask();
2013   DCHECK(IsPowerOfTwo(mask + 1));
2014   size_t mask_bits = WhichPowerOf2(mask + 1);
2015 
2016   if (mask_bits == 16u) {
2017     // Load only the bitstring part of the status word.
2018     __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
2019   } else {
2020     // /* uint32_t */ temp = temp->status_
2021     __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
2022     // Extract the bitstring bits.
2023     __ Ubfx(temp, temp, 0, mask_bits);
2024   }
2025   // Compare the bitstring bits to `path_to_root`.
2026   __ Cmp(temp, path_to_root);
2027 }
2028 
GenerateMemoryBarrier(MemBarrierKind kind)2029 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
2030   BarrierType type = BarrierAll;
2031 
2032   switch (kind) {
2033     case MemBarrierKind::kAnyAny:
2034     case MemBarrierKind::kAnyStore: {
2035       type = BarrierAll;
2036       break;
2037     }
2038     case MemBarrierKind::kLoadAny: {
2039       type = BarrierReads;
2040       break;
2041     }
2042     case MemBarrierKind::kStoreStore: {
2043       type = BarrierWrites;
2044       break;
2045     }
2046     default:
2047       LOG(FATAL) << "Unexpected memory barrier " << kind;
2048   }
2049   __ Dmb(InnerShareable, type);
2050 }
2051 
CanUseImplicitSuspendCheck() const2052 bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const {
2053   // Use implicit suspend checks if requested in compiler options unless there are SIMD
2054   // instructions in the graph. The implicit suspend check saves all FP registers as
2055   // 64-bit (in line with the calling convention) but SIMD instructions can use 128-bit
2056   // registers, so they need to be saved in an explicit slow path.
2057   return GetCompilerOptions().GetImplicitSuspendChecks() && !GetGraph()->HasSIMD();
2058 }
2059 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)2060 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
2061                                                          HBasicBlock* successor) {
2062   if (instruction->IsNoOp()) {
2063     if (successor != nullptr) {
2064       __ B(codegen_->GetLabelOf(successor));
2065     }
2066     return;
2067   }
2068 
2069   if (codegen_->CanUseImplicitSuspendCheck()) {
2070     __ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister));
2071     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
2072     if (successor != nullptr) {
2073       __ B(codegen_->GetLabelOf(successor));
2074     }
2075     return;
2076   }
2077 
2078   SuspendCheckSlowPathARM64* slow_path =
2079       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
2080   if (slow_path == nullptr) {
2081     slow_path =
2082         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
2083     instruction->SetSlowPath(slow_path);
2084     codegen_->AddSlowPath(slow_path);
2085     if (successor != nullptr) {
2086       DCHECK(successor->IsLoopHeader());
2087     }
2088   } else {
2089     DCHECK_EQ(slow_path->GetSuccessor(), successor);
2090   }
2091 
2092   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
2093   Register temp = temps.AcquireW();
2094 
2095   __ Ldr(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
2096   __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
2097   if (successor == nullptr) {
2098     __ B(ne, slow_path->GetEntryLabel());
2099     __ Bind(slow_path->GetReturnLabel());
2100   } else {
2101     __ B(eq, codegen_->GetLabelOf(successor));
2102     __ B(slow_path->GetEntryLabel());
2103     // slow_path will return to GetLabelOf(successor).
2104   }
2105 }
2106 
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)2107 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
2108                                                              CodeGeneratorARM64* codegen)
2109       : InstructionCodeGenerator(graph, codegen),
2110         assembler_(codegen->GetAssembler()),
2111         codegen_(codegen) {}
2112 
HandleBinaryOp(HBinaryOperation * instr)2113 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
2114   DCHECK_EQ(instr->InputCount(), 2U);
2115   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2116   DataType::Type type = instr->GetResultType();
2117   switch (type) {
2118     case DataType::Type::kInt32:
2119     case DataType::Type::kInt64:
2120       locations->SetInAt(0, Location::RequiresRegister());
2121       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
2122       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2123       break;
2124 
2125     case DataType::Type::kFloat32:
2126     case DataType::Type::kFloat64:
2127       locations->SetInAt(0, Location::RequiresFpuRegister());
2128       locations->SetInAt(1, Location::RequiresFpuRegister());
2129       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2130       break;
2131 
2132     default:
2133       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
2134   }
2135 }
2136 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2137 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
2138                                            const FieldInfo& field_info) {
2139   DCHECK(instruction->IsInstanceFieldGet() ||
2140          instruction->IsStaticFieldGet() ||
2141          instruction->IsPredicatedInstanceFieldGet());
2142 
2143   bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
2144 
2145   bool object_field_get_with_read_barrier =
2146       gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2147   LocationSummary* locations =
2148       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2149                                                        object_field_get_with_read_barrier
2150                                                            ? LocationSummary::kCallOnSlowPath
2151                                                            : LocationSummary::kNoCall);
2152   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
2153     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2154     // We need a temporary register for the read barrier load in
2155     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2156     // only if the field is volatile or the offset is too big.
2157     if (field_info.IsVolatile() ||
2158         field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
2159       locations->AddTemp(FixedTempLocation());
2160     }
2161   }
2162   // Input for object receiver.
2163   locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
2164   if (DataType::IsFloatingPointType(instruction->GetType())) {
2165     if (is_predicated) {
2166       locations->SetInAt(0, Location::RequiresFpuRegister());
2167       locations->SetOut(Location::SameAsFirstInput());
2168     } else {
2169       locations->SetOut(Location::RequiresFpuRegister());
2170     }
2171   } else {
2172     if (is_predicated) {
2173       locations->SetInAt(0, Location::RequiresRegister());
2174       locations->SetOut(Location::SameAsFirstInput());
2175     } else {
2176       // The output overlaps for an object field get when read barriers
2177       // are enabled: we do not want the load to overwrite the object's
2178       // location, as we need it to emit the read barrier.
2179       locations->SetOut(Location::RequiresRegister(),
2180                         object_field_get_with_read_barrier ? Location::kOutputOverlap
2181                                                            : Location::kNoOutputOverlap);
2182     }
2183   }
2184 }
2185 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)2186 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2187                                                    const FieldInfo& field_info) {
2188   DCHECK(instruction->IsInstanceFieldGet() ||
2189          instruction->IsStaticFieldGet() ||
2190          instruction->IsPredicatedInstanceFieldGet());
2191   bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
2192   LocationSummary* locations = instruction->GetLocations();
2193   uint32_t receiver_input = is_predicated ? 1 : 0;
2194   Location base_loc = locations->InAt(receiver_input);
2195   Location out = locations->Out();
2196   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2197   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
2198   DataType::Type load_type = instruction->GetType();
2199   MemOperand field =
2200       HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset());
2201 
2202   if (gUseReadBarrier && kUseBakerReadBarrier &&
2203       load_type == DataType::Type::kReference) {
2204     // Object FieldGet with Baker's read barrier case.
2205     // /* HeapReference<Object> */ out = *(base + offset)
2206     Register base = RegisterFrom(base_loc, DataType::Type::kReference);
2207     Location maybe_temp =
2208         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2209     // Note that potential implicit null checks are handled in this
2210     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2211     codegen_->GenerateFieldLoadWithBakerReadBarrier(
2212         instruction,
2213         out,
2214         base,
2215         offset,
2216         maybe_temp,
2217         /* needs_null_check= */ true,
2218         field_info.IsVolatile());
2219   } else {
2220     // General case.
2221     if (field_info.IsVolatile()) {
2222       // Note that a potential implicit null check is handled in this
2223       // CodeGeneratorARM64::LoadAcquire call.
2224       // NB: LoadAcquire will record the pc info if needed.
2225       codegen_->LoadAcquire(instruction,
2226                             load_type,
2227                             OutputCPURegister(instruction),
2228                             field,
2229                             /* needs_null_check= */ true);
2230     } else {
2231       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2232       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2233       codegen_->Load(load_type, OutputCPURegister(instruction), field);
2234       codegen_->MaybeRecordImplicitNullCheck(instruction);
2235     }
2236     if (load_type == DataType::Type::kReference) {
2237       // If read barriers are enabled, emit read barriers other than
2238       // Baker's using a slow path (and also unpoison the loaded
2239       // reference, if heap poisoning is enabled).
2240       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2241     }
2242   }
2243 }
2244 
HandleFieldSet(HInstruction * instruction)2245 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2246   LocationSummary* locations =
2247       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2248   locations->SetInAt(0, Location::RequiresRegister());
2249   HInstruction* value = instruction->InputAt(1);
2250   if (IsZeroBitPattern(value)) {
2251     locations->SetInAt(1, Location::ConstantLocation(value));
2252   } else if (DataType::IsFloatingPointType(value->GetType())) {
2253     locations->SetInAt(1, Location::RequiresFpuRegister());
2254   } else {
2255     locations->SetInAt(1, Location::RequiresRegister());
2256   }
2257 }
2258 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)2259 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2260                                                    const FieldInfo& field_info,
2261                                                    bool value_can_be_null,
2262                                                    WriteBarrierKind write_barrier_kind) {
2263   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2264   bool is_predicated =
2265       instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
2266 
2267   Register obj = InputRegisterAt(instruction, 0);
2268   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2269   CPURegister source = value;
2270   Offset offset = field_info.GetFieldOffset();
2271   DataType::Type field_type = field_info.GetFieldType();
2272   std::optional<vixl::aarch64::Label> pred_is_null;
2273   if (is_predicated) {
2274     pred_is_null.emplace();
2275     __ Cbz(obj, &*pred_is_null);
2276   }
2277 
2278   {
2279     // We use a block to end the scratch scope before the write barrier, thus
2280     // freeing the temporary registers so they can be used in `MarkGCCard`.
2281     UseScratchRegisterScope temps(GetVIXLAssembler());
2282 
2283     if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
2284       DCHECK(value.IsW());
2285       Register temp = temps.AcquireW();
2286       __ Mov(temp, value.W());
2287       GetAssembler()->PoisonHeapReference(temp.W());
2288       source = temp;
2289     }
2290 
2291     if (field_info.IsVolatile()) {
2292       codegen_->StoreRelease(
2293           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true);
2294     } else {
2295       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2296       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2297       codegen_->Store(field_type, source, HeapOperand(obj, offset));
2298       codegen_->MaybeRecordImplicitNullCheck(instruction);
2299     }
2300   }
2301 
2302   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) &&
2303       write_barrier_kind != WriteBarrierKind::kDontEmit) {
2304     codegen_->MarkGCCard(
2305         obj,
2306         Register(value),
2307         value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
2308   }
2309 
2310   if (is_predicated) {
2311     __ Bind(&*pred_is_null);
2312   }
2313 }
2314 
HandleBinaryOp(HBinaryOperation * instr)2315 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2316   DataType::Type type = instr->GetType();
2317 
2318   switch (type) {
2319     case DataType::Type::kInt32:
2320     case DataType::Type::kInt64: {
2321       Register dst = OutputRegister(instr);
2322       Register lhs = InputRegisterAt(instr, 0);
2323       Operand rhs = InputOperandAt(instr, 1);
2324       if (instr->IsAdd()) {
2325         __ Add(dst, lhs, rhs);
2326       } else if (instr->IsAnd()) {
2327         __ And(dst, lhs, rhs);
2328       } else if (instr->IsOr()) {
2329         __ Orr(dst, lhs, rhs);
2330       } else if (instr->IsSub()) {
2331         __ Sub(dst, lhs, rhs);
2332       } else if (instr->IsRor()) {
2333         if (rhs.IsImmediate()) {
2334           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2335           __ Ror(dst, lhs, shift);
2336         } else {
2337           // Ensure shift distance is in the same size register as the result. If
2338           // we are rotating a long and the shift comes in a w register originally,
2339           // we don't need to sxtw for use as an x since the shift distances are
2340           // all & reg_bits - 1.
2341           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2342         }
2343       } else if (instr->IsMin() || instr->IsMax()) {
2344           __ Cmp(lhs, rhs);
2345           __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt);
2346       } else {
2347         DCHECK(instr->IsXor());
2348         __ Eor(dst, lhs, rhs);
2349       }
2350       break;
2351     }
2352     case DataType::Type::kFloat32:
2353     case DataType::Type::kFloat64: {
2354       VRegister dst = OutputFPRegister(instr);
2355       VRegister lhs = InputFPRegisterAt(instr, 0);
2356       VRegister rhs = InputFPRegisterAt(instr, 1);
2357       if (instr->IsAdd()) {
2358         __ Fadd(dst, lhs, rhs);
2359       } else if (instr->IsSub()) {
2360         __ Fsub(dst, lhs, rhs);
2361       } else if (instr->IsMin()) {
2362         __ Fmin(dst, lhs, rhs);
2363       } else if (instr->IsMax()) {
2364         __ Fmax(dst, lhs, rhs);
2365       } else {
2366         LOG(FATAL) << "Unexpected floating-point binary operation";
2367       }
2368       break;
2369     }
2370     default:
2371       LOG(FATAL) << "Unexpected binary operation type " << type;
2372   }
2373 }
2374 
HandleShift(HBinaryOperation * instr)2375 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2376   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2377 
2378   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2379   DataType::Type type = instr->GetResultType();
2380   switch (type) {
2381     case DataType::Type::kInt32:
2382     case DataType::Type::kInt64: {
2383       locations->SetInAt(0, Location::RequiresRegister());
2384       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2385       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2386       break;
2387     }
2388     default:
2389       LOG(FATAL) << "Unexpected shift type " << type;
2390   }
2391 }
2392 
HandleShift(HBinaryOperation * instr)2393 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2394   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2395 
2396   DataType::Type type = instr->GetType();
2397   switch (type) {
2398     case DataType::Type::kInt32:
2399     case DataType::Type::kInt64: {
2400       Register dst = OutputRegister(instr);
2401       Register lhs = InputRegisterAt(instr, 0);
2402       Operand rhs = InputOperandAt(instr, 1);
2403       if (rhs.IsImmediate()) {
2404         uint32_t shift_value = rhs.GetImmediate() &
2405             (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2406         if (instr->IsShl()) {
2407           __ Lsl(dst, lhs, shift_value);
2408         } else if (instr->IsShr()) {
2409           __ Asr(dst, lhs, shift_value);
2410         } else {
2411           __ Lsr(dst, lhs, shift_value);
2412         }
2413       } else {
2414         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2415 
2416         if (instr->IsShl()) {
2417           __ Lsl(dst, lhs, rhs_reg);
2418         } else if (instr->IsShr()) {
2419           __ Asr(dst, lhs, rhs_reg);
2420         } else {
2421           __ Lsr(dst, lhs, rhs_reg);
2422         }
2423       }
2424       break;
2425     }
2426     default:
2427       LOG(FATAL) << "Unexpected shift operation type " << type;
2428   }
2429 }
2430 
VisitAdd(HAdd * instruction)2431 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2432   HandleBinaryOp(instruction);
2433 }
2434 
VisitAdd(HAdd * instruction)2435 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2436   HandleBinaryOp(instruction);
2437 }
2438 
VisitAnd(HAnd * instruction)2439 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2440   HandleBinaryOp(instruction);
2441 }
2442 
VisitAnd(HAnd * instruction)2443 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2444   HandleBinaryOp(instruction);
2445 }
2446 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2447 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2448   DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2449   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2450   locations->SetInAt(0, Location::RequiresRegister());
2451   // There is no immediate variant of negated bitwise instructions in AArch64.
2452   locations->SetInAt(1, Location::RequiresRegister());
2453   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2454 }
2455 
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2456 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2457   Register dst = OutputRegister(instr);
2458   Register lhs = InputRegisterAt(instr, 0);
2459   Register rhs = InputRegisterAt(instr, 1);
2460 
2461   switch (instr->GetOpKind()) {
2462     case HInstruction::kAnd:
2463       __ Bic(dst, lhs, rhs);
2464       break;
2465     case HInstruction::kOr:
2466       __ Orn(dst, lhs, rhs);
2467       break;
2468     case HInstruction::kXor:
2469       __ Eon(dst, lhs, rhs);
2470       break;
2471     default:
2472       LOG(FATAL) << "Unreachable";
2473   }
2474 }
2475 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2476 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2477     HDataProcWithShifterOp* instruction) {
2478   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2479          instruction->GetType() == DataType::Type::kInt64);
2480   LocationSummary* locations =
2481       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2482   if (instruction->GetInstrKind() == HInstruction::kNeg) {
2483     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)));
2484   } else {
2485     locations->SetInAt(0, Location::RequiresRegister());
2486   }
2487   locations->SetInAt(1, Location::RequiresRegister());
2488   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2489 }
2490 
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2491 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2492     HDataProcWithShifterOp* instruction) {
2493   DataType::Type type = instruction->GetType();
2494   HInstruction::InstructionKind kind = instruction->GetInstrKind();
2495   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2496   Register out = OutputRegister(instruction);
2497   Register left;
2498   if (kind != HInstruction::kNeg) {
2499     left = InputRegisterAt(instruction, 0);
2500   }
2501   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2502   // shifter operand operation, the IR generating `right_reg` (input to the type
2503   // conversion) can have a different type from the current instruction's type,
2504   // so we manually indicate the type.
2505   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2506   Operand right_operand(0);
2507 
2508   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2509   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2510     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2511   } else {
2512     right_operand = Operand(right_reg,
2513                             helpers::ShiftFromOpKind(op_kind),
2514                             instruction->GetShiftAmount());
2515   }
2516 
2517   // Logical binary operations do not support extension operations in the
2518   // operand. Note that VIXL would still manage if it was passed by generating
2519   // the extension as a separate instruction.
2520   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2521   DCHECK_IMPLIES(right_operand.IsExtendedRegister(),
2522                  kind != HInstruction::kAnd && kind != HInstruction::kOr &&
2523                      kind != HInstruction::kXor && kind != HInstruction::kNeg);
2524   switch (kind) {
2525     case HInstruction::kAdd:
2526       __ Add(out, left, right_operand);
2527       break;
2528     case HInstruction::kAnd:
2529       __ And(out, left, right_operand);
2530       break;
2531     case HInstruction::kNeg:
2532       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2533       __ Neg(out, right_operand);
2534       break;
2535     case HInstruction::kOr:
2536       __ Orr(out, left, right_operand);
2537       break;
2538     case HInstruction::kSub:
2539       __ Sub(out, left, right_operand);
2540       break;
2541     case HInstruction::kXor:
2542       __ Eor(out, left, right_operand);
2543       break;
2544     default:
2545       LOG(FATAL) << "Unexpected operation kind: " << kind;
2546       UNREACHABLE();
2547   }
2548 }
2549 
VisitIntermediateAddress(HIntermediateAddress * instruction)2550 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2551   LocationSummary* locations =
2552       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2553   locations->SetInAt(0, Location::RequiresRegister());
2554   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2555   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2556 }
2557 
VisitIntermediateAddress(HIntermediateAddress * instruction)2558 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2559   __ Add(OutputRegister(instruction),
2560          InputRegisterAt(instruction, 0),
2561          Operand(InputOperandAt(instruction, 1)));
2562 }
2563 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2564 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2565   LocationSummary* locations =
2566       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2567 
2568   HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2569 
2570   locations->SetInAt(0, Location::RequiresRegister());
2571   // For byte case we don't need to shift the index variable so we can encode the data offset into
2572   // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2573   // data offset constant generation out of the loop and reduce the critical path length in the
2574   // loop.
2575   locations->SetInAt(1, shift->GetValue() == 0
2576                         ? Location::ConstantLocation(instruction->GetOffset())
2577                         : Location::RequiresRegister());
2578   locations->SetInAt(2, Location::ConstantLocation(shift));
2579   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2580 }
2581 
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2582 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2583     HIntermediateAddressIndex* instruction) {
2584   Register index_reg = InputRegisterAt(instruction, 0);
2585   uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2));
2586   uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2587 
2588   if (shift == 0) {
2589     __ Add(OutputRegister(instruction), index_reg, offset);
2590   } else {
2591     Register offset_reg = InputRegisterAt(instruction, 1);
2592     __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2593   }
2594 }
2595 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2596 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2597   LocationSummary* locations =
2598       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2599   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2600   if (instr->GetOpKind() == HInstruction::kSub &&
2601       accumulator->IsConstant() &&
2602       accumulator->AsConstant()->IsArithmeticZero()) {
2603     // Don't allocate register for Mneg instruction.
2604   } else {
2605     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2606                        Location::RequiresRegister());
2607   }
2608   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2609   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2610   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2611 }
2612 
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2613 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2614   Register res = OutputRegister(instr);
2615   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2616   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2617 
2618   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2619   // This fixup should be carried out for all multiply-accumulate instructions:
2620   // madd, msub, smaddl, smsubl, umaddl and umsubl.
2621   if (instr->GetType() == DataType::Type::kInt64 &&
2622       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2623     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2624     ptrdiff_t off = masm->GetCursorOffset();
2625     if (off >= static_cast<ptrdiff_t>(kInstructionSize) &&
2626         masm->GetInstructionAt(off - static_cast<ptrdiff_t>(kInstructionSize))->IsLoadOrStore()) {
2627       // Make sure we emit only exactly one nop.
2628       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2629       __ nop();
2630     }
2631   }
2632 
2633   if (instr->GetOpKind() == HInstruction::kAdd) {
2634     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2635     __ Madd(res, mul_left, mul_right, accumulator);
2636   } else {
2637     DCHECK(instr->GetOpKind() == HInstruction::kSub);
2638     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2639     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2640       __ Mneg(res, mul_left, mul_right);
2641     } else {
2642       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2643       __ Msub(res, mul_left, mul_right, accumulator);
2644     }
2645   }
2646 }
2647 
VisitArrayGet(HArrayGet * instruction)2648 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2649   bool object_array_get_with_read_barrier =
2650       gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2651   LocationSummary* locations =
2652       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2653                                                        object_array_get_with_read_barrier
2654                                                            ? LocationSummary::kCallOnSlowPath
2655                                                            : LocationSummary::kNoCall);
2656   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2657     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2658     if (instruction->GetIndex()->IsConstant()) {
2659       // Array loads with constant index are treated as field loads.
2660       // We need a temporary register for the read barrier load in
2661       // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2662       // only if the offset is too big.
2663       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2664       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2665       offset += index << DataType::SizeShift(DataType::Type::kReference);
2666       if (offset >= kReferenceLoadMinFarOffset) {
2667         locations->AddTemp(FixedTempLocation());
2668       }
2669     } else if (!instruction->GetArray()->IsIntermediateAddress()) {
2670       // We need a non-scratch temporary for the array data pointer in
2671       // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no
2672       // intermediate address.
2673       locations->AddTemp(Location::RequiresRegister());
2674     }
2675   }
2676   locations->SetInAt(0, Location::RequiresRegister());
2677   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2678   if (DataType::IsFloatingPointType(instruction->GetType())) {
2679     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2680   } else {
2681     // The output overlaps in the case of an object array get with
2682     // read barriers enabled: we do not want the move to overwrite the
2683     // array's location, as we need it to emit the read barrier.
2684     locations->SetOut(
2685         Location::RequiresRegister(),
2686         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2687   }
2688 }
2689 
VisitArrayGet(HArrayGet * instruction)2690 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2691   DataType::Type type = instruction->GetType();
2692   Register obj = InputRegisterAt(instruction, 0);
2693   LocationSummary* locations = instruction->GetLocations();
2694   Location index = locations->InAt(1);
2695   Location out = locations->Out();
2696   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2697   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2698                                         instruction->IsStringCharAt();
2699   MacroAssembler* masm = GetVIXLAssembler();
2700   UseScratchRegisterScope temps(masm);
2701 
2702   // The non-Baker read barrier instrumentation of object ArrayGet instructions
2703   // does not support the HIntermediateAddress instruction.
2704   DCHECK(!((type == DataType::Type::kReference) &&
2705            instruction->GetArray()->IsIntermediateAddress() &&
2706            gUseReadBarrier &&
2707            !kUseBakerReadBarrier));
2708 
2709   if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
2710     // Object ArrayGet with Baker's read barrier case.
2711     // Note that a potential implicit null check is handled in the
2712     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2713     DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2714     if (index.IsConstant()) {
2715       DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2716       // Array load with a constant index can be treated as a field load.
2717       offset += Int64FromLocation(index) << DataType::SizeShift(type);
2718       Location maybe_temp =
2719           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2720       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2721                                                       out,
2722                                                       obj.W(),
2723                                                       offset,
2724                                                       maybe_temp,
2725                                                       /* needs_null_check= */ false,
2726                                                       /* use_load_acquire= */ false);
2727     } else {
2728       codegen_->GenerateArrayLoadWithBakerReadBarrier(
2729           instruction, out, obj.W(), offset, index, /* needs_null_check= */ false);
2730     }
2731   } else {
2732     // General case.
2733     MemOperand source = HeapOperand(obj);
2734     Register length;
2735     if (maybe_compressed_char_at) {
2736       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2737       length = temps.AcquireW();
2738       {
2739         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2740         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2741 
2742         if (instruction->GetArray()->IsIntermediateAddress()) {
2743           DCHECK_LT(count_offset, offset);
2744           int64_t adjusted_offset =
2745               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2746           // Note that `adjusted_offset` is negative, so this will be a LDUR.
2747           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2748         } else {
2749           __ Ldr(length, HeapOperand(obj, count_offset));
2750         }
2751         codegen_->MaybeRecordImplicitNullCheck(instruction);
2752       }
2753     }
2754     if (index.IsConstant()) {
2755       if (maybe_compressed_char_at) {
2756         vixl::aarch64::Label uncompressed_load, done;
2757         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2758                       "Expecting 0=compressed, 1=uncompressed");
2759         __ Tbnz(length.W(), 0, &uncompressed_load);
2760         __ Ldrb(Register(OutputCPURegister(instruction)),
2761                 HeapOperand(obj, offset + Int64FromLocation(index)));
2762         __ B(&done);
2763         __ Bind(&uncompressed_load);
2764         __ Ldrh(Register(OutputCPURegister(instruction)),
2765                 HeapOperand(obj, offset + (Int64FromLocation(index) << 1)));
2766         __ Bind(&done);
2767       } else {
2768         offset += Int64FromLocation(index) << DataType::SizeShift(type);
2769         source = HeapOperand(obj, offset);
2770       }
2771     } else {
2772       Register temp = temps.AcquireSameSizeAs(obj);
2773       if (instruction->GetArray()->IsIntermediateAddress()) {
2774         // We do not need to compute the intermediate address from the array: the
2775         // input instruction has done it already. See the comment in
2776         // `TryExtractArrayAccessAddress()`.
2777         if (kIsDebugBuild) {
2778           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2779           DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2780         }
2781         temp = obj;
2782       } else {
2783         __ Add(temp, obj, offset);
2784       }
2785       if (maybe_compressed_char_at) {
2786         vixl::aarch64::Label uncompressed_load, done;
2787         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2788                       "Expecting 0=compressed, 1=uncompressed");
2789         __ Tbnz(length.W(), 0, &uncompressed_load);
2790         __ Ldrb(Register(OutputCPURegister(instruction)),
2791                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2792         __ B(&done);
2793         __ Bind(&uncompressed_load);
2794         __ Ldrh(Register(OutputCPURegister(instruction)),
2795                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2796         __ Bind(&done);
2797       } else {
2798         source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2799       }
2800     }
2801     if (!maybe_compressed_char_at) {
2802       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2803       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2804       codegen_->Load(type, OutputCPURegister(instruction), source);
2805       codegen_->MaybeRecordImplicitNullCheck(instruction);
2806     }
2807 
2808     if (type == DataType::Type::kReference) {
2809       static_assert(
2810           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2811           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2812       Location obj_loc = locations->InAt(0);
2813       if (index.IsConstant()) {
2814         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2815       } else {
2816         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2817       }
2818     }
2819   }
2820 }
2821 
VisitArrayLength(HArrayLength * instruction)2822 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2823   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2824   locations->SetInAt(0, Location::RequiresRegister());
2825   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2826 }
2827 
VisitArrayLength(HArrayLength * instruction)2828 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2829   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2830   vixl::aarch64::Register out = OutputRegister(instruction);
2831   {
2832     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2833     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2834     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2835     codegen_->MaybeRecordImplicitNullCheck(instruction);
2836   }
2837   // Mask out compression flag from String's array length.
2838   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2839     __ Lsr(out.W(), out.W(), 1u);
2840   }
2841 }
2842 
VisitArraySet(HArraySet * instruction)2843 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2844   DataType::Type value_type = instruction->GetComponentType();
2845 
2846   bool needs_type_check = instruction->NeedsTypeCheck();
2847   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
2848       instruction,
2849       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
2850   locations->SetInAt(0, Location::RequiresRegister());
2851   locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetIndex()));
2852   HInstruction* value = instruction->GetValue();
2853   if (IsZeroBitPattern(value)) {
2854     locations->SetInAt(2, Location::ConstantLocation(value));
2855   } else if (DataType::IsFloatingPointType(value_type)) {
2856     locations->SetInAt(2, Location::RequiresFpuRegister());
2857   } else {
2858     locations->SetInAt(2, Location::RequiresRegister());
2859   }
2860 }
2861 
VisitArraySet(HArraySet * instruction)2862 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2863   DataType::Type value_type = instruction->GetComponentType();
2864   LocationSummary* locations = instruction->GetLocations();
2865   bool needs_type_check = instruction->NeedsTypeCheck();
2866   bool needs_write_barrier =
2867       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2868 
2869   Register array = InputRegisterAt(instruction, 0);
2870   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2871   CPURegister source = value;
2872   Location index = locations->InAt(1);
2873   size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
2874   MemOperand destination = HeapOperand(array);
2875   MacroAssembler* masm = GetVIXLAssembler();
2876 
2877   if (!needs_write_barrier) {
2878     DCHECK(!needs_type_check);
2879     if (index.IsConstant()) {
2880       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2881       destination = HeapOperand(array, offset);
2882     } else {
2883       UseScratchRegisterScope temps(masm);
2884       Register temp = temps.AcquireSameSizeAs(array);
2885       if (instruction->GetArray()->IsIntermediateAddress()) {
2886         // We do not need to compute the intermediate address from the array: the
2887         // input instruction has done it already. See the comment in
2888         // `TryExtractArrayAccessAddress()`.
2889         if (kIsDebugBuild) {
2890           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2891           DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2892         }
2893         temp = array;
2894       } else {
2895         __ Add(temp, array, offset);
2896       }
2897       destination = HeapOperand(temp,
2898                                 XRegisterFrom(index),
2899                                 LSL,
2900                                 DataType::SizeShift(value_type));
2901     }
2902     {
2903       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2904       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2905       codegen_->Store(value_type, value, destination);
2906       codegen_->MaybeRecordImplicitNullCheck(instruction);
2907     }
2908   } else {
2909     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2910 
2911     bool can_value_be_null = instruction->GetValueCanBeNull();
2912     vixl::aarch64::Label do_store;
2913     if (can_value_be_null) {
2914       __ Cbz(Register(value), &do_store);
2915     }
2916 
2917     SlowPathCodeARM64* slow_path = nullptr;
2918     if (needs_type_check) {
2919       slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
2920       codegen_->AddSlowPath(slow_path);
2921 
2922       const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2923       const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2924       const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2925 
2926       UseScratchRegisterScope temps(masm);
2927       Register temp = temps.AcquireSameSizeAs(array);
2928       Register temp2 = temps.AcquireSameSizeAs(array);
2929 
2930       // Note that when Baker read barriers are enabled, the type
2931       // checks are performed without read barriers.  This is fine,
2932       // even in the case where a class object is in the from-space
2933       // after the flip, as a comparison involving such a type would
2934       // not produce a false positive; it may of course produce a
2935       // false negative, in which case we would take the ArraySet
2936       // slow path.
2937 
2938       // /* HeapReference<Class> */ temp = array->klass_
2939       {
2940         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2941         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2942         __ Ldr(temp, HeapOperand(array, class_offset));
2943         codegen_->MaybeRecordImplicitNullCheck(instruction);
2944       }
2945       GetAssembler()->MaybeUnpoisonHeapReference(temp);
2946 
2947       // /* HeapReference<Class> */ temp = temp->component_type_
2948       __ Ldr(temp, HeapOperand(temp, component_offset));
2949       // /* HeapReference<Class> */ temp2 = value->klass_
2950       __ Ldr(temp2, HeapOperand(Register(value), class_offset));
2951       // If heap poisoning is enabled, no need to unpoison `temp`
2952       // nor `temp2`, as we are comparing two poisoned references.
2953       __ Cmp(temp, temp2);
2954 
2955       if (instruction->StaticTypeOfArrayIsObjectArray()) {
2956         vixl::aarch64::Label do_put;
2957         __ B(eq, &do_put);
2958         // If heap poisoning is enabled, the `temp` reference has
2959         // not been unpoisoned yet; unpoison it now.
2960         GetAssembler()->MaybeUnpoisonHeapReference(temp);
2961 
2962         // /* HeapReference<Class> */ temp = temp->super_class_
2963         __ Ldr(temp, HeapOperand(temp, super_offset));
2964         // If heap poisoning is enabled, no need to unpoison
2965         // `temp`, as we are comparing against null below.
2966         __ Cbnz(temp, slow_path->GetEntryLabel());
2967         __ Bind(&do_put);
2968       } else {
2969         __ B(ne, slow_path->GetEntryLabel());
2970       }
2971     }
2972 
2973     if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
2974       DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
2975           << " Already null checked so we shouldn't do it again.";
2976       codegen_->MarkGCCard(array, value.W(), /* emit_null_check= */ false);
2977     }
2978 
2979     if (can_value_be_null) {
2980       DCHECK(do_store.IsLinked());
2981       __ Bind(&do_store);
2982     }
2983 
2984     UseScratchRegisterScope temps(masm);
2985     if (kPoisonHeapReferences) {
2986       Register temp_source = temps.AcquireSameSizeAs(array);
2987         DCHECK(value.IsW());
2988       __ Mov(temp_source, value.W());
2989       GetAssembler()->PoisonHeapReference(temp_source);
2990       source = temp_source;
2991     }
2992 
2993     if (index.IsConstant()) {
2994       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2995       destination = HeapOperand(array, offset);
2996     } else {
2997       Register temp_base = temps.AcquireSameSizeAs(array);
2998       __ Add(temp_base, array, offset);
2999       destination = HeapOperand(temp_base,
3000                                 XRegisterFrom(index),
3001                                 LSL,
3002                                 DataType::SizeShift(value_type));
3003     }
3004 
3005     {
3006       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3007       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3008       __ Str(source, destination);
3009 
3010       if (can_value_be_null || !needs_type_check) {
3011         codegen_->MaybeRecordImplicitNullCheck(instruction);
3012       }
3013     }
3014 
3015     if (slow_path != nullptr) {
3016       __ Bind(slow_path->GetExitLabel());
3017     }
3018   }
3019 }
3020 
VisitBoundsCheck(HBoundsCheck * instruction)3021 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3022   RegisterSet caller_saves = RegisterSet::Empty();
3023   InvokeRuntimeCallingConvention calling_convention;
3024   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3025   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
3026   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
3027 
3028   // If both index and length are constant, we can check the bounds statically and
3029   // generate code accordingly. We want to make sure we generate constant locations
3030   // in that case, regardless of whether they are encodable in the comparison or not.
3031   HInstruction* index = instruction->InputAt(0);
3032   HInstruction* length = instruction->InputAt(1);
3033   bool both_const = index->IsConstant() && length->IsConstant();
3034   locations->SetInAt(0, both_const
3035       ? Location::ConstantLocation(index)
3036       : ARM64EncodableConstantOrRegister(index, instruction));
3037   locations->SetInAt(1, both_const
3038       ? Location::ConstantLocation(length)
3039       : ARM64EncodableConstantOrRegister(length, instruction));
3040 }
3041 
VisitBoundsCheck(HBoundsCheck * instruction)3042 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3043   LocationSummary* locations = instruction->GetLocations();
3044   Location index_loc = locations->InAt(0);
3045   Location length_loc = locations->InAt(1);
3046 
3047   int cmp_first_input = 0;
3048   int cmp_second_input = 1;
3049   Condition cond = hs;
3050 
3051   if (index_loc.IsConstant()) {
3052     int64_t index = Int64FromLocation(index_loc);
3053     if (length_loc.IsConstant()) {
3054       int64_t length = Int64FromLocation(length_loc);
3055       if (index < 0 || index >= length) {
3056         BoundsCheckSlowPathARM64* slow_path =
3057             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3058         codegen_->AddSlowPath(slow_path);
3059         __ B(slow_path->GetEntryLabel());
3060       } else {
3061         // BCE will remove the bounds check if we are guaranteed to pass.
3062         // However, some optimization after BCE may have generated this, and we should not
3063         // generate a bounds check if it is a valid range.
3064       }
3065       return;
3066     }
3067     // Only the index is constant: change the order of the operands and commute the condition
3068     // so we can use an immediate constant for the index (only the second input to a cmp
3069     // instruction can be an immediate).
3070     cmp_first_input = 1;
3071     cmp_second_input = 0;
3072     cond = ls;
3073   }
3074   BoundsCheckSlowPathARM64* slow_path =
3075       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
3076   __ Cmp(InputRegisterAt(instruction, cmp_first_input),
3077          InputOperandAt(instruction, cmp_second_input));
3078   codegen_->AddSlowPath(slow_path);
3079   __ B(slow_path->GetEntryLabel(), cond);
3080 }
3081 
VisitClinitCheck(HClinitCheck * check)3082 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
3083   LocationSummary* locations =
3084       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
3085   locations->SetInAt(0, Location::RequiresRegister());
3086   if (check->HasUses()) {
3087     locations->SetOut(Location::SameAsFirstInput());
3088   }
3089   // Rely on the type initialization to save everything we need.
3090   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
3091 }
3092 
VisitClinitCheck(HClinitCheck * check)3093 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
3094   // We assume the class is not null.
3095   SlowPathCodeARM64* slow_path =
3096       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check);
3097   codegen_->AddSlowPath(slow_path);
3098   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
3099 }
3100 
IsFloatingPointZeroConstant(HInstruction * inst)3101 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
3102   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
3103       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
3104 }
3105 
GenerateFcmp(HInstruction * instruction)3106 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
3107   VRegister lhs_reg = InputFPRegisterAt(instruction, 0);
3108   Location rhs_loc = instruction->GetLocations()->InAt(1);
3109   if (rhs_loc.IsConstant()) {
3110     // 0.0 is the only immediate that can be encoded directly in
3111     // an FCMP instruction.
3112     //
3113     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
3114     // specify that in a floating-point comparison, positive zero
3115     // and negative zero are considered equal, so we can use the
3116     // literal 0.0 for both cases here.
3117     //
3118     // Note however that some methods (Float.equal, Float.compare,
3119     // Float.compareTo, Double.equal, Double.compare,
3120     // Double.compareTo, Math.max, Math.min, StrictMath.max,
3121     // StrictMath.min) consider 0.0 to be (strictly) greater than
3122     // -0.0. So if we ever translate calls to these methods into a
3123     // HCompare instruction, we must handle the -0.0 case with
3124     // care here.
3125     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
3126     __ Fcmp(lhs_reg, 0.0);
3127   } else {
3128     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
3129   }
3130 }
3131 
VisitCompare(HCompare * compare)3132 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
3133   LocationSummary* locations =
3134       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
3135   DataType::Type in_type = compare->InputAt(0)->GetType();
3136   HInstruction* rhs = compare->InputAt(1);
3137   switch (in_type) {
3138     case DataType::Type::kBool:
3139     case DataType::Type::kUint8:
3140     case DataType::Type::kInt8:
3141     case DataType::Type::kUint16:
3142     case DataType::Type::kInt16:
3143     case DataType::Type::kInt32:
3144     case DataType::Type::kInt64: {
3145       locations->SetInAt(0, Location::RequiresRegister());
3146       locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, compare));
3147       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3148       break;
3149     }
3150     case DataType::Type::kFloat32:
3151     case DataType::Type::kFloat64: {
3152       locations->SetInAt(0, Location::RequiresFpuRegister());
3153       locations->SetInAt(1,
3154                          IsFloatingPointZeroConstant(rhs)
3155                              ? Location::ConstantLocation(rhs)
3156                              : Location::RequiresFpuRegister());
3157       locations->SetOut(Location::RequiresRegister());
3158       break;
3159     }
3160     default:
3161       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
3162   }
3163 }
3164 
VisitCompare(HCompare * compare)3165 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
3166   DataType::Type in_type = compare->InputAt(0)->GetType();
3167 
3168   //  0 if: left == right
3169   //  1 if: left  > right
3170   // -1 if: left  < right
3171   switch (in_type) {
3172     case DataType::Type::kBool:
3173     case DataType::Type::kUint8:
3174     case DataType::Type::kInt8:
3175     case DataType::Type::kUint16:
3176     case DataType::Type::kInt16:
3177     case DataType::Type::kInt32:
3178     case DataType::Type::kInt64: {
3179       Register result = OutputRegister(compare);
3180       Register left = InputRegisterAt(compare, 0);
3181       Operand right = InputOperandAt(compare, 1);
3182       __ Cmp(left, right);
3183       __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
3184       __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
3185       break;
3186     }
3187     case DataType::Type::kFloat32:
3188     case DataType::Type::kFloat64: {
3189       Register result = OutputRegister(compare);
3190       GenerateFcmp(compare);
3191       __ Cset(result, ne);
3192       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3193       break;
3194     }
3195     default:
3196       LOG(FATAL) << "Unimplemented compare type " << in_type;
3197   }
3198 }
3199 
HandleCondition(HCondition * instruction)3200 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3201   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
3202 
3203   HInstruction* rhs = instruction->InputAt(1);
3204   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3205     locations->SetInAt(0, Location::RequiresFpuRegister());
3206     locations->SetInAt(1,
3207                        IsFloatingPointZeroConstant(rhs)
3208                            ? Location::ConstantLocation(rhs)
3209                            : Location::RequiresFpuRegister());
3210   } else {
3211     // Integer cases.
3212     locations->SetInAt(0, Location::RequiresRegister());
3213     locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, instruction));
3214   }
3215 
3216   if (!instruction->IsEmittedAtUseSite()) {
3217     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3218   }
3219 }
3220 
HandleCondition(HCondition * instruction)3221 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3222   if (instruction->IsEmittedAtUseSite()) {
3223     return;
3224   }
3225 
3226   LocationSummary* locations = instruction->GetLocations();
3227   Register res = RegisterFrom(locations->Out(), instruction->GetType());
3228   IfCondition if_cond = instruction->GetCondition();
3229 
3230   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3231     GenerateFcmp(instruction);
3232     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3233   } else {
3234     // Integer cases.
3235     Register lhs = InputRegisterAt(instruction, 0);
3236     Operand rhs = InputOperandAt(instruction, 1);
3237     __ Cmp(lhs, rhs);
3238     __ Cset(res, ARM64Condition(if_cond));
3239   }
3240 }
3241 
3242 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
3243   M(Equal)                                                                               \
3244   M(NotEqual)                                                                            \
3245   M(LessThan)                                                                            \
3246   M(LessThanOrEqual)                                                                     \
3247   M(GreaterThan)                                                                         \
3248   M(GreaterThanOrEqual)                                                                  \
3249   M(Below)                                                                               \
3250   M(BelowOrEqual)                                                                        \
3251   M(Above)                                                                               \
3252   M(AboveOrEqual)
3253 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
3254 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
3255 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)3256 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3257 #undef DEFINE_CONDITION_VISITORS
3258 #undef FOR_EACH_CONDITION_INSTRUCTION
3259 
3260 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) {
3261   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3262   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3263   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
3264 
3265   Register out = OutputRegister(instruction);
3266   Register dividend = InputRegisterAt(instruction, 0);
3267 
3268   Register final_dividend;
3269   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
3270     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
3271     // NOTE: The generated code for HDiv correctly works for the INT32_MIN/INT64_MIN dividends:
3272     //   imm == 2
3273     //     add out, dividend(0x80000000), dividend(0x80000000), lsr #31 => out = 0x80000001
3274     //     asr out, out(0x80000001), #1 => out = 0xc0000000
3275     //     This is the same as 'asr out, 0x80000000, #1'
3276     //
3277     //   imm > 2
3278     //     add temp, dividend(0x80000000), imm - 1 => temp = 0b10..01..1, where the number
3279     //         of the rightmost 1s is ctz_imm.
3280     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3281     //     csel out, temp(0b10..01..1), dividend(0x80000000), lt => out = 0b10..01..1
3282     //     asr out, out(0b10..01..1), #ctz_imm => out = 0b1..10..0, where the number of the
3283     //         leftmost 1s is ctz_imm + 1.
3284     //     This is the same as 'asr out, dividend(0x80000000), #ctz_imm'.
3285     //
3286     //   imm == INT32_MIN
3287     //     add tmp, dividend(0x80000000), #0x7fffffff => tmp = -1
3288     //     cmp dividend(0x80000000), 0 => N = 1, V = 0 (lt is true)
3289     //     csel out, temp(-1), dividend(0x80000000), lt => out = -1
3290     //     neg out, out(-1), asr #31 => out = 1
3291     //     This is the same as 'neg out, dividend(0x80000000), asr #31'.
3292     final_dividend = dividend;
3293   } else {
3294     if (abs_imm == 2) {
3295       int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
3296       __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
3297     } else {
3298       UseScratchRegisterScope temps(GetVIXLAssembler());
3299       Register temp = temps.AcquireSameSizeAs(out);
3300       __ Add(temp, dividend, abs_imm - 1);
3301       __ Cmp(dividend, 0);
3302       __ Csel(out, temp, dividend, lt);
3303     }
3304     final_dividend = out;
3305   }
3306 
3307   int ctz_imm = CTZ(abs_imm);
3308   if (imm > 0) {
3309     __ Asr(out, final_dividend, ctz_imm);
3310   } else {
3311     __ Neg(out, Operand(final_dividend, ASR, ctz_imm));
3312   }
3313 }
3314 
3315 // Return true if the magic number was modified by subtracting 2^32(Int32 div) or 2^64(Int64 div).
3316 // So dividend needs to be added.
NeedToAddDividend(int64_t magic_number,int64_t divisor)3317 static inline bool NeedToAddDividend(int64_t magic_number, int64_t divisor) {
3318   return divisor > 0 && magic_number < 0;
3319 }
3320 
3321 // Return true if the magic number was modified by adding 2^32(Int32 div) or 2^64(Int64 div).
3322 // So dividend needs to be subtracted.
NeedToSubDividend(int64_t magic_number,int64_t divisor)3323 static inline bool NeedToSubDividend(int64_t magic_number, int64_t divisor) {
3324   return divisor < 0 && magic_number > 0;
3325 }
3326 
3327 // Generate code which increments the value in register 'in' by 1 if the value is negative.
3328 // It is done with 'add out, in, in, lsr #31 or #63'.
3329 // If the value is a result of an operation setting the N flag, CINC MI can be used
3330 // instead of ADD. 'use_cond_inc' controls this.
GenerateIncrementNegativeByOne(Register out,Register in,bool use_cond_inc)3331 void InstructionCodeGeneratorARM64::GenerateIncrementNegativeByOne(
3332     Register out,
3333     Register in,
3334     bool use_cond_inc) {
3335   if (use_cond_inc) {
3336     __ Cinc(out, in, mi);
3337   } else {
3338     __ Add(out, in, Operand(in, LSR, in.GetSizeInBits() - 1));
3339   }
3340 }
3341 
3342 // Helper to generate code producing the result of HRem with a constant divisor.
GenerateResultRemWithAnyConstant(Register out,Register dividend,Register quotient,int64_t divisor,UseScratchRegisterScope * temps_scope)3343 void InstructionCodeGeneratorARM64::GenerateResultRemWithAnyConstant(
3344     Register out,
3345     Register dividend,
3346     Register quotient,
3347     int64_t divisor,
3348     UseScratchRegisterScope* temps_scope) {
3349   Register temp_imm = temps_scope->AcquireSameSizeAs(out);
3350   __ Mov(temp_imm, divisor);
3351   __ Msub(out, quotient, temp_imm, dividend);
3352 }
3353 
3354 // Helper to generate code for HDiv/HRem instructions when a dividend is non-negative and
3355 // a divisor is a positive constant, not power of 2.
GenerateInt64UnsignedDivRemWithAnyPositiveConstant(HBinaryOperation * instruction)3356 void InstructionCodeGeneratorARM64::GenerateInt64UnsignedDivRemWithAnyPositiveConstant(
3357     HBinaryOperation* instruction) {
3358   DCHECK(instruction->IsDiv() || instruction->IsRem());
3359   DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3360 
3361   LocationSummary* locations = instruction->GetLocations();
3362   Location second = locations->InAt(1);
3363   DCHECK(second.IsConstant());
3364 
3365   Register out = OutputRegister(instruction);
3366   Register dividend = InputRegisterAt(instruction, 0);
3367   int64_t imm = Int64FromConstant(second.GetConstant());
3368   DCHECK_GT(imm, 0);
3369 
3370   int64_t magic;
3371   int shift;
3372   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3373 
3374   UseScratchRegisterScope temps(GetVIXLAssembler());
3375   Register temp = temps.AcquireSameSizeAs(out);
3376 
3377   auto generate_unsigned_div_code = [this, magic, shift](Register out,
3378                                                          Register dividend,
3379                                                          Register temp) {
3380     // temp = get_high(dividend * magic)
3381     __ Mov(temp, magic);
3382     if (magic > 0 && shift == 0) {
3383       __ Smulh(out, dividend, temp);
3384     } else {
3385       __ Smulh(temp, dividend, temp);
3386       if (magic < 0) {
3387         // The negative magic means that the multiplier m is greater than INT64_MAX.
3388         // In such a case shift is never 0. See the proof in
3389         // InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant.
3390         __ Add(temp, temp, dividend);
3391       }
3392       DCHECK_NE(shift, 0);
3393       __ Lsr(out, temp, shift);
3394     }
3395   };
3396 
3397   if (instruction->IsDiv()) {
3398     generate_unsigned_div_code(out, dividend, temp);
3399   } else {
3400     generate_unsigned_div_code(temp, dividend, temp);
3401     GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3402   }
3403 }
3404 
3405 // Helper to generate code for HDiv/HRem instructions for any dividend and a constant divisor
3406 // (not power of 2).
GenerateInt64DivRemWithAnyConstant(HBinaryOperation * instruction)3407 void InstructionCodeGeneratorARM64::GenerateInt64DivRemWithAnyConstant(
3408     HBinaryOperation* instruction) {
3409   DCHECK(instruction->IsDiv() || instruction->IsRem());
3410   DCHECK(instruction->GetResultType() == DataType::Type::kInt64);
3411 
3412   LocationSummary* locations = instruction->GetLocations();
3413   Location second = locations->InAt(1);
3414   DCHECK(second.IsConstant());
3415 
3416   Register out = OutputRegister(instruction);
3417   Register dividend = InputRegisterAt(instruction, 0);
3418   int64_t imm = Int64FromConstant(second.GetConstant());
3419 
3420   int64_t magic;
3421   int shift;
3422   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ true, &magic, &shift);
3423 
3424   UseScratchRegisterScope temps(GetVIXLAssembler());
3425   Register temp = temps.AcquireSameSizeAs(out);
3426 
3427   // temp = get_high(dividend * magic)
3428   __ Mov(temp, magic);
3429   __ Smulh(temp, dividend, temp);
3430 
3431   // The multiplication result might need some corrections to be finalized.
3432   // The last correction is to increment by 1, if the result is negative.
3433   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3434   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3435   // However if one of the corrections is ADD or SUB, the sign can be detected
3436   // with ADDS/SUBS. They set the N flag if the result is negative.
3437   // This allows to use CINC MI which has latency 1.
3438   bool use_cond_inc = false;
3439 
3440   // Some combinations of magic_number and the divisor require to correct the result.
3441   // Check whether the correction is needed.
3442   if (NeedToAddDividend(magic, imm)) {
3443     __ Adds(temp, temp, dividend);
3444     use_cond_inc = true;
3445   } else if (NeedToSubDividend(magic, imm)) {
3446     __ Subs(temp, temp, dividend);
3447     use_cond_inc = true;
3448   }
3449 
3450   if (shift != 0) {
3451     __ Asr(temp, temp, shift);
3452   }
3453 
3454   if (instruction->IsRem()) {
3455     GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3456     GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3457   } else {
3458     GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3459   }
3460 }
3461 
GenerateInt32DivRemWithAnyConstant(HBinaryOperation * instruction)3462 void InstructionCodeGeneratorARM64::GenerateInt32DivRemWithAnyConstant(
3463     HBinaryOperation* instruction) {
3464   DCHECK(instruction->IsDiv() || instruction->IsRem());
3465   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
3466 
3467   LocationSummary* locations = instruction->GetLocations();
3468   Location second = locations->InAt(1);
3469   DCHECK(second.IsConstant());
3470 
3471   Register out = OutputRegister(instruction);
3472   Register dividend = InputRegisterAt(instruction, 0);
3473   int64_t imm = Int64FromConstant(second.GetConstant());
3474 
3475   int64_t magic;
3476   int shift;
3477   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3478   UseScratchRegisterScope temps(GetVIXLAssembler());
3479   Register temp = temps.AcquireSameSizeAs(out);
3480 
3481   // temp = get_high(dividend * magic)
3482   __ Mov(temp, magic);
3483   __ Smull(temp.X(), dividend, temp);
3484 
3485   // The multiplication result might need some corrections to be finalized.
3486   // The last correction is to increment by 1, if the result is negative.
3487   // Currently it is done with 'add result, temp_result, temp_result, lsr #31 or #63'.
3488   // Such ADD usually has latency 2, e.g. on Cortex-A55.
3489   // However if one of the corrections is ADD or SUB, the sign can be detected
3490   // with ADDS/SUBS. They set the N flag if the result is negative.
3491   // This allows to use CINC MI which has latency 1.
3492   bool use_cond_inc = false;
3493 
3494   // ADD/SUB correction is performed in the high 32 bits
3495   // as high 32 bits are ignored because type are kInt32.
3496   if (NeedToAddDividend(magic, imm)) {
3497     __ Adds(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3498     use_cond_inc = true;
3499   } else if (NeedToSubDividend(magic, imm)) {
3500     __ Subs(temp.X(), temp.X(), Operand(dividend.X(), LSL, 32));
3501     use_cond_inc = true;
3502   }
3503 
3504   // Extract the result from the high 32 bits and apply the final right shift.
3505   DCHECK_LT(shift, 32);
3506   if (imm > 0 && HasNonNegativeInputAt(instruction, 0)) {
3507     // No need to adjust the result for a non-negative dividend and a positive divisor.
3508     if (instruction->IsDiv()) {
3509       __ Lsr(out.X(), temp.X(), 32 + shift);
3510     } else {
3511       __ Lsr(temp.X(), temp.X(), 32 + shift);
3512       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3513     }
3514   } else {
3515     __ Asr(temp.X(), temp.X(), 32 + shift);
3516 
3517     if (instruction->IsRem()) {
3518       GenerateIncrementNegativeByOne(temp, temp, use_cond_inc);
3519       GenerateResultRemWithAnyConstant(out, dividend, temp, imm, &temps);
3520     } else {
3521       GenerateIncrementNegativeByOne(out, temp, use_cond_inc);
3522     }
3523   }
3524 }
3525 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction,int64_t divisor)3526 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction,
3527                                                                   int64_t divisor) {
3528   DCHECK(instruction->IsDiv() || instruction->IsRem());
3529   if (instruction->GetResultType() == DataType::Type::kInt64) {
3530     if (divisor > 0 && HasNonNegativeInputAt(instruction, 0)) {
3531       GenerateInt64UnsignedDivRemWithAnyPositiveConstant(instruction);
3532     } else {
3533       GenerateInt64DivRemWithAnyConstant(instruction);
3534     }
3535   } else {
3536     GenerateInt32DivRemWithAnyConstant(instruction);
3537   }
3538 }
3539 
GenerateIntDivForConstDenom(HDiv * instruction)3540 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) {
3541   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3542 
3543   if (imm == 0) {
3544     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3545     return;
3546   }
3547 
3548   if (IsPowerOfTwo(AbsOrMin(imm))) {
3549     GenerateIntDivForPower2Denom(instruction);
3550   } else {
3551     // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
3552     DCHECK(imm < -2 || imm > 2) << imm;
3553     GenerateDivRemWithAnyConstant(instruction, imm);
3554   }
3555 }
3556 
GenerateIntDiv(HDiv * instruction)3557 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) {
3558   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
3559        << instruction->GetResultType();
3560 
3561   if (instruction->GetLocations()->InAt(1).IsConstant()) {
3562     GenerateIntDivForConstDenom(instruction);
3563   } else {
3564     Register out = OutputRegister(instruction);
3565     Register dividend = InputRegisterAt(instruction, 0);
3566     Register divisor = InputRegisterAt(instruction, 1);
3567     __ Sdiv(out, dividend, divisor);
3568   }
3569 }
3570 
VisitDiv(HDiv * div)3571 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3572   LocationSummary* locations =
3573       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3574   switch (div->GetResultType()) {
3575     case DataType::Type::kInt32:
3576     case DataType::Type::kInt64:
3577       locations->SetInAt(0, Location::RequiresRegister());
3578       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3579       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3580       break;
3581 
3582     case DataType::Type::kFloat32:
3583     case DataType::Type::kFloat64:
3584       locations->SetInAt(0, Location::RequiresFpuRegister());
3585       locations->SetInAt(1, Location::RequiresFpuRegister());
3586       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3587       break;
3588 
3589     default:
3590       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3591   }
3592 }
3593 
VisitDiv(HDiv * div)3594 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3595   DataType::Type type = div->GetResultType();
3596   switch (type) {
3597     case DataType::Type::kInt32:
3598     case DataType::Type::kInt64:
3599       GenerateIntDiv(div);
3600       break;
3601 
3602     case DataType::Type::kFloat32:
3603     case DataType::Type::kFloat64:
3604       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3605       break;
3606 
3607     default:
3608       LOG(FATAL) << "Unexpected div type " << type;
3609   }
3610 }
3611 
VisitDivZeroCheck(HDivZeroCheck * instruction)3612 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3613   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3614   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3615 }
3616 
VisitDivZeroCheck(HDivZeroCheck * instruction)3617 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3618   SlowPathCodeARM64* slow_path =
3619       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3620   codegen_->AddSlowPath(slow_path);
3621   Location value = instruction->GetLocations()->InAt(0);
3622 
3623   DataType::Type type = instruction->GetType();
3624 
3625   if (!DataType::IsIntegralType(type)) {
3626     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3627     UNREACHABLE();
3628   }
3629 
3630   if (value.IsConstant()) {
3631     int64_t divisor = Int64FromLocation(value);
3632     if (divisor == 0) {
3633       __ B(slow_path->GetEntryLabel());
3634     } else {
3635       // A division by a non-null constant is valid. We don't need to perform
3636       // any check, so simply fall through.
3637     }
3638   } else {
3639     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3640   }
3641 }
3642 
VisitDoubleConstant(HDoubleConstant * constant)3643 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3644   LocationSummary* locations =
3645       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3646   locations->SetOut(Location::ConstantLocation(constant));
3647 }
3648 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3649 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3650     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3651   // Will be generated at use site.
3652 }
3653 
VisitExit(HExit * exit)3654 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3655   exit->SetLocations(nullptr);
3656 }
3657 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)3658 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3659 }
3660 
VisitFloatConstant(HFloatConstant * constant)3661 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3662   LocationSummary* locations =
3663       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3664   locations->SetOut(Location::ConstantLocation(constant));
3665 }
3666 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3667 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3668   // Will be generated at use site.
3669 }
3670 
HandleGoto(HInstruction * got,HBasicBlock * successor)3671 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3672   if (successor->IsExitBlock()) {
3673     DCHECK(got->GetPrevious()->AlwaysThrows());
3674     return;  // no code needed
3675   }
3676 
3677   HBasicBlock* block = got->GetBlock();
3678   HInstruction* previous = got->GetPrevious();
3679   HLoopInformation* info = block->GetLoopInformation();
3680 
3681   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3682     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
3683     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3684     return;  // `GenerateSuspendCheck()` emitted the jump.
3685   }
3686   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3687     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3688     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
3689   }
3690   if (!codegen_->GoesToNextBlock(block, successor)) {
3691     __ B(codegen_->GetLabelOf(successor));
3692   }
3693 }
3694 
VisitGoto(HGoto * got)3695 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3696   got->SetLocations(nullptr);
3697 }
3698 
VisitGoto(HGoto * got)3699 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3700   HandleGoto(got, got->GetSuccessor());
3701 }
3702 
VisitTryBoundary(HTryBoundary * try_boundary)3703 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3704   try_boundary->SetLocations(nullptr);
3705 }
3706 
VisitTryBoundary(HTryBoundary * try_boundary)3707 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3708   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3709   if (!successor->IsExitBlock()) {
3710     HandleGoto(try_boundary, successor);
3711   }
3712 }
3713 
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3714 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3715                                                           size_t condition_input_index,
3716                                                           vixl::aarch64::Label* true_target,
3717                                                           vixl::aarch64::Label* false_target) {
3718   HInstruction* cond = instruction->InputAt(condition_input_index);
3719 
3720   if (true_target == nullptr && false_target == nullptr) {
3721     // Nothing to do. The code always falls through.
3722     return;
3723   } else if (cond->IsIntConstant()) {
3724     // Constant condition, statically compared against "true" (integer value 1).
3725     if (cond->AsIntConstant()->IsTrue()) {
3726       if (true_target != nullptr) {
3727         __ B(true_target);
3728       }
3729     } else {
3730       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3731       if (false_target != nullptr) {
3732         __ B(false_target);
3733       }
3734     }
3735     return;
3736   }
3737 
3738   // The following code generates these patterns:
3739   //  (1) true_target == nullptr && false_target != nullptr
3740   //        - opposite condition true => branch to false_target
3741   //  (2) true_target != nullptr && false_target == nullptr
3742   //        - condition true => branch to true_target
3743   //  (3) true_target != nullptr && false_target != nullptr
3744   //        - condition true => branch to true_target
3745   //        - branch to false_target
3746   if (IsBooleanValueOrMaterializedCondition(cond)) {
3747     // The condition instruction has been materialized, compare the output to 0.
3748     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3749     DCHECK(cond_val.IsRegister());
3750       if (true_target == nullptr) {
3751       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3752     } else {
3753       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3754     }
3755   } else {
3756     // The condition instruction has not been materialized, use its inputs as
3757     // the comparison and its condition as the branch condition.
3758     HCondition* condition = cond->AsCondition();
3759 
3760     DataType::Type type = condition->InputAt(0)->GetType();
3761     if (DataType::IsFloatingPointType(type)) {
3762       GenerateFcmp(condition);
3763       if (true_target == nullptr) {
3764         IfCondition opposite_condition = condition->GetOppositeCondition();
3765         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3766       } else {
3767         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3768       }
3769     } else {
3770       // Integer cases.
3771       Register lhs = InputRegisterAt(condition, 0);
3772       Operand rhs = InputOperandAt(condition, 1);
3773 
3774       Condition arm64_cond;
3775       vixl::aarch64::Label* non_fallthrough_target;
3776       if (true_target == nullptr) {
3777         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3778         non_fallthrough_target = false_target;
3779       } else {
3780         arm64_cond = ARM64Condition(condition->GetCondition());
3781         non_fallthrough_target = true_target;
3782       }
3783 
3784       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3785           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3786         switch (arm64_cond) {
3787           case eq:
3788             __ Cbz(lhs, non_fallthrough_target);
3789             break;
3790           case ne:
3791             __ Cbnz(lhs, non_fallthrough_target);
3792             break;
3793           case lt:
3794             // Test the sign bit and branch accordingly.
3795             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3796             break;
3797           case ge:
3798             // Test the sign bit and branch accordingly.
3799             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3800             break;
3801           default:
3802             // Without the `static_cast` the compiler throws an error for
3803             // `-Werror=sign-promo`.
3804             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3805         }
3806       } else {
3807         __ Cmp(lhs, rhs);
3808         __ B(arm64_cond, non_fallthrough_target);
3809       }
3810     }
3811   }
3812 
3813   // If neither branch falls through (case 3), the conditional branch to `true_target`
3814   // was already emitted (case 2) and we need to emit a jump to `false_target`.
3815   if (true_target != nullptr && false_target != nullptr) {
3816     __ B(false_target);
3817   }
3818 }
3819 
VisitIf(HIf * if_instr)3820 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3821   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3822   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3823     locations->SetInAt(0, Location::RequiresRegister());
3824   }
3825 }
3826 
VisitIf(HIf * if_instr)3827 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3828   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3829   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3830   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3831   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3832     true_target = nullptr;
3833   }
3834   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3835   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3836     false_target = nullptr;
3837   }
3838   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3839 }
3840 
VisitDeoptimize(HDeoptimize * deoptimize)3841 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3842   LocationSummary* locations = new (GetGraph()->GetAllocator())
3843       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3844   InvokeRuntimeCallingConvention calling_convention;
3845   RegisterSet caller_saves = RegisterSet::Empty();
3846   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3847   locations->SetCustomSlowPathCallerSaves(caller_saves);
3848   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3849     locations->SetInAt(0, Location::RequiresRegister());
3850   }
3851 }
3852 
VisitDeoptimize(HDeoptimize * deoptimize)3853 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3854   SlowPathCodeARM64* slow_path =
3855       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3856   GenerateTestAndBranch(deoptimize,
3857                         /* condition_input_index= */ 0,
3858                         slow_path->GetEntryLabel(),
3859                         /* false_target= */ nullptr);
3860 }
3861 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3862 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3863   LocationSummary* locations = new (GetGraph()->GetAllocator())
3864       LocationSummary(flag, LocationSummary::kNoCall);
3865   locations->SetOut(Location::RequiresRegister());
3866 }
3867 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3868 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3869   __ Ldr(OutputRegister(flag),
3870          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3871 }
3872 
IsConditionOnFloatingPointValues(HInstruction * condition)3873 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3874   return condition->IsCondition() &&
3875          DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
3876 }
3877 
GetConditionForSelect(HCondition * condition)3878 static inline Condition GetConditionForSelect(HCondition* condition) {
3879   IfCondition cond = condition->AsCondition()->GetCondition();
3880   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3881                                                      : ARM64Condition(cond);
3882 }
3883 
VisitSelect(HSelect * select)3884 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3885   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3886   if (DataType::IsFloatingPointType(select->GetType())) {
3887     locations->SetInAt(0, Location::RequiresFpuRegister());
3888     locations->SetInAt(1, Location::RequiresFpuRegister());
3889     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3890   } else {
3891     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3892     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3893     bool is_true_value_constant = cst_true_value != nullptr;
3894     bool is_false_value_constant = cst_false_value != nullptr;
3895     // Ask VIXL whether we should synthesize constants in registers.
3896     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3897     Operand true_op = is_true_value_constant ?
3898         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3899     Operand false_op = is_false_value_constant ?
3900         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3901     bool true_value_in_register = false;
3902     bool false_value_in_register = false;
3903     MacroAssembler::GetCselSynthesisInformation(
3904         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3905     true_value_in_register |= !is_true_value_constant;
3906     false_value_in_register |= !is_false_value_constant;
3907 
3908     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3909                                                  : Location::ConstantLocation(cst_true_value));
3910     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3911                                                   : Location::ConstantLocation(cst_false_value));
3912     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3913   }
3914 
3915   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3916     locations->SetInAt(2, Location::RequiresRegister());
3917   }
3918 }
3919 
VisitSelect(HSelect * select)3920 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3921   HInstruction* cond = select->GetCondition();
3922   Condition csel_cond;
3923 
3924   if (IsBooleanValueOrMaterializedCondition(cond)) {
3925     if (cond->IsCondition() && cond->GetNext() == select) {
3926       // Use the condition flags set by the previous instruction.
3927       csel_cond = GetConditionForSelect(cond->AsCondition());
3928     } else {
3929       __ Cmp(InputRegisterAt(select, 2), 0);
3930       csel_cond = ne;
3931     }
3932   } else if (IsConditionOnFloatingPointValues(cond)) {
3933     GenerateFcmp(cond);
3934     csel_cond = GetConditionForSelect(cond->AsCondition());
3935   } else {
3936     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3937     csel_cond = GetConditionForSelect(cond->AsCondition());
3938   }
3939 
3940   if (DataType::IsFloatingPointType(select->GetType())) {
3941     __ Fcsel(OutputFPRegister(select),
3942              InputFPRegisterAt(select, 1),
3943              InputFPRegisterAt(select, 0),
3944              csel_cond);
3945   } else {
3946     __ Csel(OutputRegister(select),
3947             InputOperandAt(select, 1),
3948             InputOperandAt(select, 0),
3949             csel_cond);
3950   }
3951 }
3952 
VisitNop(HNop * nop)3953 void LocationsBuilderARM64::VisitNop(HNop* nop) {
3954   new (GetGraph()->GetAllocator()) LocationSummary(nop);
3955 }
3956 
VisitNop(HNop *)3957 void InstructionCodeGeneratorARM64::VisitNop(HNop*) {
3958   // The environment recording already happened in CodeGenerator::Compile.
3959 }
3960 
IncreaseFrame(size_t adjustment)3961 void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) {
3962   __ Claim(adjustment);
3963   GetAssembler()->cfi().AdjustCFAOffset(adjustment);
3964 }
3965 
DecreaseFrame(size_t adjustment)3966 void CodeGeneratorARM64::DecreaseFrame(size_t adjustment) {
3967   __ Drop(adjustment);
3968   GetAssembler()->cfi().AdjustCFAOffset(-adjustment);
3969 }
3970 
GenerateNop()3971 void CodeGeneratorARM64::GenerateNop() {
3972   __ Nop();
3973 }
3974 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)3975 void LocationsBuilderARM64::VisitPredicatedInstanceFieldGet(
3976     HPredicatedInstanceFieldGet* instruction) {
3977   HandleFieldGet(instruction, instruction->GetFieldInfo());
3978 }
3979 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3980 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3981   HandleFieldGet(instruction, instruction->GetFieldInfo());
3982 }
3983 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)3984 void InstructionCodeGeneratorARM64::VisitPredicatedInstanceFieldGet(
3985     HPredicatedInstanceFieldGet* instruction) {
3986   vixl::aarch64::Label finish;
3987   __ Cbz(InputRegisterAt(instruction, 1), &finish);
3988   HandleFieldGet(instruction, instruction->GetFieldInfo());
3989   __ Bind(&finish);
3990 }
3991 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3992 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3993   HandleFieldGet(instruction, instruction->GetFieldInfo());
3994 }
3995 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3996 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3997   HandleFieldSet(instruction);
3998 }
3999 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4000 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4001   HandleFieldSet(instruction,
4002                  instruction->GetFieldInfo(),
4003                  instruction->GetValueCanBeNull(),
4004                  instruction->GetWriteBarrierKind());
4005 }
4006 
4007 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)4008 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
4009   if (gUseReadBarrier &&
4010       (kUseBakerReadBarrier ||
4011           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
4012           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
4013           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
4014     return 1;
4015   }
4016   return 0;
4017 }
4018 
4019 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
4020 // interface pointer, one for loading the current interface.
4021 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)4022 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
4023   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
4024     return 3;
4025   }
4026   return 1 + NumberOfInstanceOfTemps(type_check_kind);
4027 }
4028 
VisitInstanceOf(HInstanceOf * instruction)4029 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
4030   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4031   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4032   bool baker_read_barrier_slow_path = false;
4033   switch (type_check_kind) {
4034     case TypeCheckKind::kExactCheck:
4035     case TypeCheckKind::kAbstractClassCheck:
4036     case TypeCheckKind::kClassHierarchyCheck:
4037     case TypeCheckKind::kArrayObjectCheck: {
4038       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
4039       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
4040       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
4041       break;
4042     }
4043     case TypeCheckKind::kArrayCheck:
4044     case TypeCheckKind::kUnresolvedCheck:
4045     case TypeCheckKind::kInterfaceCheck:
4046       call_kind = LocationSummary::kCallOnSlowPath;
4047       break;
4048     case TypeCheckKind::kBitstringCheck:
4049       break;
4050   }
4051 
4052   LocationSummary* locations =
4053       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4054   if (baker_read_barrier_slow_path) {
4055     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4056   }
4057   locations->SetInAt(0, Location::RequiresRegister());
4058   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4059     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
4060     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
4061     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
4062   } else {
4063     locations->SetInAt(1, Location::RequiresRegister());
4064   }
4065   // The "out" register is used as a temporary, so it overlaps with the inputs.
4066   // Note that TypeCheckSlowPathARM64 uses this register too.
4067   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4068   // Add temps if necessary for read barriers.
4069   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
4070 }
4071 
VisitInstanceOf(HInstanceOf * instruction)4072 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
4073   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4074   LocationSummary* locations = instruction->GetLocations();
4075   Location obj_loc = locations->InAt(0);
4076   Register obj = InputRegisterAt(instruction, 0);
4077   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4078       ? Register()
4079       : InputRegisterAt(instruction, 1);
4080   Location out_loc = locations->Out();
4081   Register out = OutputRegister(instruction);
4082   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
4083   DCHECK_LE(num_temps, 1u);
4084   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
4085   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4086   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4087   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4088   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4089 
4090   vixl::aarch64::Label done, zero;
4091   SlowPathCodeARM64* slow_path = nullptr;
4092 
4093   // Return 0 if `obj` is null.
4094   // Avoid null check if we know `obj` is not null.
4095   if (instruction->MustDoNullCheck()) {
4096     __ Cbz(obj, &zero);
4097   }
4098 
4099   switch (type_check_kind) {
4100     case TypeCheckKind::kExactCheck: {
4101       ReadBarrierOption read_barrier_option =
4102           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
4103       // /* HeapReference<Class> */ out = obj->klass_
4104       GenerateReferenceLoadTwoRegisters(instruction,
4105                                         out_loc,
4106                                         obj_loc,
4107                                         class_offset,
4108                                         maybe_temp_loc,
4109                                         read_barrier_option);
4110       __ Cmp(out, cls);
4111       __ Cset(out, eq);
4112       if (zero.IsLinked()) {
4113         __ B(&done);
4114       }
4115       break;
4116     }
4117 
4118     case TypeCheckKind::kAbstractClassCheck: {
4119       ReadBarrierOption read_barrier_option =
4120           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
4121       // /* HeapReference<Class> */ out = obj->klass_
4122       GenerateReferenceLoadTwoRegisters(instruction,
4123                                         out_loc,
4124                                         obj_loc,
4125                                         class_offset,
4126                                         maybe_temp_loc,
4127                                         read_barrier_option);
4128       // If the class is abstract, we eagerly fetch the super class of the
4129       // object to avoid doing a comparison we know will fail.
4130       vixl::aarch64::Label loop, success;
4131       __ Bind(&loop);
4132       // /* HeapReference<Class> */ out = out->super_class_
4133       GenerateReferenceLoadOneRegister(instruction,
4134                                        out_loc,
4135                                        super_offset,
4136                                        maybe_temp_loc,
4137                                        read_barrier_option);
4138       // If `out` is null, we use it for the result, and jump to `done`.
4139       __ Cbz(out, &done);
4140       __ Cmp(out, cls);
4141       __ B(ne, &loop);
4142       __ Mov(out, 1);
4143       if (zero.IsLinked()) {
4144         __ B(&done);
4145       }
4146       break;
4147     }
4148 
4149     case TypeCheckKind::kClassHierarchyCheck: {
4150       ReadBarrierOption read_barrier_option =
4151           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
4152       // /* HeapReference<Class> */ out = obj->klass_
4153       GenerateReferenceLoadTwoRegisters(instruction,
4154                                         out_loc,
4155                                         obj_loc,
4156                                         class_offset,
4157                                         maybe_temp_loc,
4158                                         read_barrier_option);
4159       // Walk over the class hierarchy to find a match.
4160       vixl::aarch64::Label loop, success;
4161       __ Bind(&loop);
4162       __ Cmp(out, cls);
4163       __ B(eq, &success);
4164       // /* HeapReference<Class> */ out = out->super_class_
4165       GenerateReferenceLoadOneRegister(instruction,
4166                                        out_loc,
4167                                        super_offset,
4168                                        maybe_temp_loc,
4169                                        read_barrier_option);
4170       __ Cbnz(out, &loop);
4171       // If `out` is null, we use it for the result, and jump to `done`.
4172       __ B(&done);
4173       __ Bind(&success);
4174       __ Mov(out, 1);
4175       if (zero.IsLinked()) {
4176         __ B(&done);
4177       }
4178       break;
4179     }
4180 
4181     case TypeCheckKind::kArrayObjectCheck: {
4182       ReadBarrierOption read_barrier_option =
4183           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
4184       // /* HeapReference<Class> */ out = obj->klass_
4185       GenerateReferenceLoadTwoRegisters(instruction,
4186                                         out_loc,
4187                                         obj_loc,
4188                                         class_offset,
4189                                         maybe_temp_loc,
4190                                         read_barrier_option);
4191       // Do an exact check.
4192       vixl::aarch64::Label exact_check;
4193       __ Cmp(out, cls);
4194       __ B(eq, &exact_check);
4195       // Otherwise, we need to check that the object's class is a non-primitive array.
4196       // /* HeapReference<Class> */ out = out->component_type_
4197       GenerateReferenceLoadOneRegister(instruction,
4198                                        out_loc,
4199                                        component_offset,
4200                                        maybe_temp_loc,
4201                                        read_barrier_option);
4202       // If `out` is null, we use it for the result, and jump to `done`.
4203       __ Cbz(out, &done);
4204       __ Ldrh(out, HeapOperand(out, primitive_offset));
4205       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4206       __ Cbnz(out, &zero);
4207       __ Bind(&exact_check);
4208       __ Mov(out, 1);
4209       __ B(&done);
4210       break;
4211     }
4212 
4213     case TypeCheckKind::kArrayCheck: {
4214       // No read barrier since the slow path will retry upon failure.
4215       // /* HeapReference<Class> */ out = obj->klass_
4216       GenerateReferenceLoadTwoRegisters(instruction,
4217                                         out_loc,
4218                                         obj_loc,
4219                                         class_offset,
4220                                         maybe_temp_loc,
4221                                         kWithoutReadBarrier);
4222       __ Cmp(out, cls);
4223       DCHECK(locations->OnlyCallsOnSlowPath());
4224       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4225           instruction, /* is_fatal= */ false);
4226       codegen_->AddSlowPath(slow_path);
4227       __ B(ne, slow_path->GetEntryLabel());
4228       __ Mov(out, 1);
4229       if (zero.IsLinked()) {
4230         __ B(&done);
4231       }
4232       break;
4233     }
4234 
4235     case TypeCheckKind::kUnresolvedCheck:
4236     case TypeCheckKind::kInterfaceCheck: {
4237       // Note that we indeed only call on slow path, but we always go
4238       // into the slow path for the unresolved and interface check
4239       // cases.
4240       //
4241       // We cannot directly call the InstanceofNonTrivial runtime
4242       // entry point without resorting to a type checking slow path
4243       // here (i.e. by calling InvokeRuntime directly), as it would
4244       // require to assign fixed registers for the inputs of this
4245       // HInstanceOf instruction (following the runtime calling
4246       // convention), which might be cluttered by the potential first
4247       // read barrier emission at the beginning of this method.
4248       //
4249       // TODO: Introduce a new runtime entry point taking the object
4250       // to test (instead of its class) as argument, and let it deal
4251       // with the read barrier issues. This will let us refactor this
4252       // case of the `switch` code as it was previously (with a direct
4253       // call to the runtime not using a type checking slow path).
4254       // This should also be beneficial for the other cases above.
4255       DCHECK(locations->OnlyCallsOnSlowPath());
4256       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4257           instruction, /* is_fatal= */ false);
4258       codegen_->AddSlowPath(slow_path);
4259       __ B(slow_path->GetEntryLabel());
4260       if (zero.IsLinked()) {
4261         __ B(&done);
4262       }
4263       break;
4264     }
4265 
4266     case TypeCheckKind::kBitstringCheck: {
4267       // /* HeapReference<Class> */ temp = obj->klass_
4268       GenerateReferenceLoadTwoRegisters(instruction,
4269                                         out_loc,
4270                                         obj_loc,
4271                                         class_offset,
4272                                         maybe_temp_loc,
4273                                         kWithoutReadBarrier);
4274 
4275       GenerateBitstringTypeCheckCompare(instruction, out);
4276       __ Cset(out, eq);
4277       if (zero.IsLinked()) {
4278         __ B(&done);
4279       }
4280       break;
4281     }
4282   }
4283 
4284   if (zero.IsLinked()) {
4285     __ Bind(&zero);
4286     __ Mov(out, 0);
4287   }
4288 
4289   if (done.IsLinked()) {
4290     __ Bind(&done);
4291   }
4292 
4293   if (slow_path != nullptr) {
4294     __ Bind(slow_path->GetExitLabel());
4295   }
4296 }
4297 
VisitCheckCast(HCheckCast * instruction)4298 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4299   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4300   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
4301   LocationSummary* locations =
4302       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
4303   locations->SetInAt(0, Location::RequiresRegister());
4304   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
4305     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
4306     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
4307     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
4308   } else {
4309     locations->SetInAt(1, Location::RequiresRegister());
4310   }
4311   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
4312   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
4313 }
4314 
VisitCheckCast(HCheckCast * instruction)4315 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4316   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4317   LocationSummary* locations = instruction->GetLocations();
4318   Location obj_loc = locations->InAt(0);
4319   Register obj = InputRegisterAt(instruction, 0);
4320   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
4321       ? Register()
4322       : InputRegisterAt(instruction, 1);
4323   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
4324   DCHECK_GE(num_temps, 1u);
4325   DCHECK_LE(num_temps, 3u);
4326   Location temp_loc = locations->GetTemp(0);
4327   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4328   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4329   Register temp = WRegisterFrom(temp_loc);
4330   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4331   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4332   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4333   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4334   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4335   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4336   const uint32_t object_array_data_offset =
4337       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4338 
4339   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
4340   SlowPathCodeARM64* type_check_slow_path =
4341       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
4342           instruction, is_type_check_slow_path_fatal);
4343   codegen_->AddSlowPath(type_check_slow_path);
4344 
4345   vixl::aarch64::Label done;
4346   // Avoid null check if we know obj is not null.
4347   if (instruction->MustDoNullCheck()) {
4348     __ Cbz(obj, &done);
4349   }
4350 
4351   switch (type_check_kind) {
4352     case TypeCheckKind::kExactCheck:
4353     case TypeCheckKind::kArrayCheck: {
4354       // /* HeapReference<Class> */ temp = obj->klass_
4355       GenerateReferenceLoadTwoRegisters(instruction,
4356                                         temp_loc,
4357                                         obj_loc,
4358                                         class_offset,
4359                                         maybe_temp2_loc,
4360                                         kWithoutReadBarrier);
4361 
4362       __ Cmp(temp, cls);
4363       // Jump to slow path for throwing the exception or doing a
4364       // more involved array check.
4365       __ B(ne, type_check_slow_path->GetEntryLabel());
4366       break;
4367     }
4368 
4369     case TypeCheckKind::kAbstractClassCheck: {
4370       // /* HeapReference<Class> */ temp = obj->klass_
4371       GenerateReferenceLoadTwoRegisters(instruction,
4372                                         temp_loc,
4373                                         obj_loc,
4374                                         class_offset,
4375                                         maybe_temp2_loc,
4376                                         kWithoutReadBarrier);
4377 
4378       // If the class is abstract, we eagerly fetch the super class of the
4379       // object to avoid doing a comparison we know will fail.
4380       vixl::aarch64::Label loop;
4381       __ Bind(&loop);
4382       // /* HeapReference<Class> */ temp = temp->super_class_
4383       GenerateReferenceLoadOneRegister(instruction,
4384                                        temp_loc,
4385                                        super_offset,
4386                                        maybe_temp2_loc,
4387                                        kWithoutReadBarrier);
4388 
4389       // If the class reference currently in `temp` is null, jump to the slow path to throw the
4390       // exception.
4391       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4392       // Otherwise, compare classes.
4393       __ Cmp(temp, cls);
4394       __ B(ne, &loop);
4395       break;
4396     }
4397 
4398     case TypeCheckKind::kClassHierarchyCheck: {
4399       // /* HeapReference<Class> */ temp = obj->klass_
4400       GenerateReferenceLoadTwoRegisters(instruction,
4401                                         temp_loc,
4402                                         obj_loc,
4403                                         class_offset,
4404                                         maybe_temp2_loc,
4405                                         kWithoutReadBarrier);
4406 
4407       // Walk over the class hierarchy to find a match.
4408       vixl::aarch64::Label loop;
4409       __ Bind(&loop);
4410       __ Cmp(temp, cls);
4411       __ B(eq, &done);
4412 
4413       // /* HeapReference<Class> */ temp = temp->super_class_
4414       GenerateReferenceLoadOneRegister(instruction,
4415                                        temp_loc,
4416                                        super_offset,
4417                                        maybe_temp2_loc,
4418                                        kWithoutReadBarrier);
4419 
4420       // If the class reference currently in `temp` is not null, jump
4421       // back at the beginning of the loop.
4422       __ Cbnz(temp, &loop);
4423       // Otherwise, jump to the slow path to throw the exception.
4424       __ B(type_check_slow_path->GetEntryLabel());
4425       break;
4426     }
4427 
4428     case TypeCheckKind::kArrayObjectCheck: {
4429       // /* HeapReference<Class> */ temp = obj->klass_
4430       GenerateReferenceLoadTwoRegisters(instruction,
4431                                         temp_loc,
4432                                         obj_loc,
4433                                         class_offset,
4434                                         maybe_temp2_loc,
4435                                         kWithoutReadBarrier);
4436 
4437       // Do an exact check.
4438       __ Cmp(temp, cls);
4439       __ B(eq, &done);
4440 
4441       // Otherwise, we need to check that the object's class is a non-primitive array.
4442       // /* HeapReference<Class> */ temp = temp->component_type_
4443       GenerateReferenceLoadOneRegister(instruction,
4444                                        temp_loc,
4445                                        component_offset,
4446                                        maybe_temp2_loc,
4447                                        kWithoutReadBarrier);
4448 
4449       // If the component type is null, jump to the slow path to throw the exception.
4450       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4451       // Otherwise, the object is indeed an array. Further check that this component type is not a
4452       // primitive type.
4453       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4454       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4455       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4456       break;
4457     }
4458 
4459     case TypeCheckKind::kUnresolvedCheck:
4460       // We always go into the type check slow path for the unresolved check cases.
4461       //
4462       // We cannot directly call the CheckCast runtime entry point
4463       // without resorting to a type checking slow path here (i.e. by
4464       // calling InvokeRuntime directly), as it would require to
4465       // assign fixed registers for the inputs of this HInstanceOf
4466       // instruction (following the runtime calling convention), which
4467       // might be cluttered by the potential first read barrier
4468       // emission at the beginning of this method.
4469       __ B(type_check_slow_path->GetEntryLabel());
4470       break;
4471     case TypeCheckKind::kInterfaceCheck: {
4472       // /* HeapReference<Class> */ temp = obj->klass_
4473       GenerateReferenceLoadTwoRegisters(instruction,
4474                                         temp_loc,
4475                                         obj_loc,
4476                                         class_offset,
4477                                         maybe_temp2_loc,
4478                                         kWithoutReadBarrier);
4479 
4480       // /* HeapReference<Class> */ temp = temp->iftable_
4481       GenerateReferenceLoadTwoRegisters(instruction,
4482                                         temp_loc,
4483                                         temp_loc,
4484                                         iftable_offset,
4485                                         maybe_temp2_loc,
4486                                         kWithoutReadBarrier);
4487       // Iftable is never null.
4488       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4489       // Loop through the iftable and check if any class matches.
4490       vixl::aarch64::Label start_loop;
4491       __ Bind(&start_loop);
4492       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4493       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4494       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4495       // Go to next interface.
4496       __ Add(temp, temp, 2 * kHeapReferenceSize);
4497       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4498       // Compare the classes and continue the loop if they do not match.
4499       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4500       __ B(ne, &start_loop);
4501       break;
4502     }
4503 
4504     case TypeCheckKind::kBitstringCheck: {
4505       // /* HeapReference<Class> */ temp = obj->klass_
4506       GenerateReferenceLoadTwoRegisters(instruction,
4507                                         temp_loc,
4508                                         obj_loc,
4509                                         class_offset,
4510                                         maybe_temp2_loc,
4511                                         kWithoutReadBarrier);
4512 
4513       GenerateBitstringTypeCheckCompare(instruction, temp);
4514       __ B(ne, type_check_slow_path->GetEntryLabel());
4515       break;
4516     }
4517   }
4518   __ Bind(&done);
4519 
4520   __ Bind(type_check_slow_path->GetExitLabel());
4521 }
4522 
VisitIntConstant(HIntConstant * constant)4523 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4524   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4525   locations->SetOut(Location::ConstantLocation(constant));
4526 }
4527 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)4528 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
4529   // Will be generated at use site.
4530 }
4531 
VisitNullConstant(HNullConstant * constant)4532 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4533   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4534   locations->SetOut(Location::ConstantLocation(constant));
4535 }
4536 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)4537 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
4538   // Will be generated at use site.
4539 }
4540 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4541 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4542   // The trampoline uses the same calling convention as dex calling conventions,
4543   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4544   // the method_idx.
4545   HandleInvoke(invoke);
4546 }
4547 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)4548 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4549   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4550   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4551 }
4552 
HandleInvoke(HInvoke * invoke)4553 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4554   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4555   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4556 }
4557 
VisitInvokeInterface(HInvokeInterface * invoke)4558 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4559   HandleInvoke(invoke);
4560   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4561     // We cannot request ip1 as it's blocked by the register allocator.
4562     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1, Location::Any());
4563   }
4564 }
4565 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)4566 void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
4567                                                        Register klass) {
4568   DCHECK_EQ(klass.GetCode(), 0u);
4569   // We know the destination of an intrinsic, so no need to record inline
4570   // caches.
4571   if (!instruction->GetLocations()->Intrinsified() &&
4572       GetGraph()->IsCompilingBaseline() &&
4573       !Runtime::Current()->IsAotCompiler()) {
4574     DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
4575     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
4576     DCHECK(info != nullptr);
4577     InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
4578     uint64_t address = reinterpret_cast64<uint64_t>(cache);
4579     vixl::aarch64::Label done;
4580     __ Mov(x8, address);
4581     __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
4582     // Fast path for a monomorphic cache.
4583     __ Cmp(klass, x9);
4584     __ B(eq, &done);
4585     InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
4586     __ Bind(&done);
4587   }
4588 }
4589 
VisitInvokeInterface(HInvokeInterface * invoke)4590 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4591   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4592   LocationSummary* locations = invoke->GetLocations();
4593   Register temp = XRegisterFrom(locations->GetTemp(0));
4594   Location receiver = locations->InAt(0);
4595   Offset class_offset = mirror::Object::ClassOffset();
4596   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4597 
4598   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4599   if (receiver.IsStackSlot()) {
4600     __ Ldr(temp.W(), StackOperandFrom(receiver));
4601     {
4602       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4603       // /* HeapReference<Class> */ temp = temp->klass_
4604       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4605       codegen_->MaybeRecordImplicitNullCheck(invoke);
4606     }
4607   } else {
4608     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4609     // /* HeapReference<Class> */ temp = receiver->klass_
4610     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4611     codegen_->MaybeRecordImplicitNullCheck(invoke);
4612   }
4613 
4614   // Instead of simply (possibly) unpoisoning `temp` here, we should
4615   // emit a read barrier for the previous class reference load.
4616   // However this is not required in practice, as this is an
4617   // intermediate/temporary reference and because the current
4618   // concurrent copying collector keeps the from-space memory
4619   // intact/accessible until the end of the marking phase (the
4620   // concurrent copying collector may not in the future).
4621   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4622 
4623   // If we're compiling baseline, update the inline cache.
4624   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
4625 
4626   // The register ip1 is required to be used for the hidden argument in
4627   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4628   MacroAssembler* masm = GetVIXLAssembler();
4629   UseScratchRegisterScope scratch_scope(masm);
4630   scratch_scope.Exclude(ip1);
4631   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
4632     Location interface_method = locations->InAt(invoke->GetNumberOfArguments() - 1);
4633     if (interface_method.IsStackSlot()) {
4634       __ Ldr(ip1, StackOperandFrom(interface_method));
4635     } else {
4636       __ Mov(ip1, XRegisterFrom(interface_method));
4637     }
4638   // If the load kind is through a runtime call, we will pass the method we
4639   // fetch the IMT, which will either be a no-op if we don't hit the conflict
4640   // stub, or will make us always go through the trampoline when there is a
4641   // conflict.
4642   } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
4643     codegen_->LoadMethod(
4644         invoke->GetHiddenArgumentLoadKind(), Location::RegisterLocation(ip1.GetCode()), invoke);
4645   }
4646 
4647   __ Ldr(temp,
4648       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4649   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4650       invoke->GetImtIndex(), kArm64PointerSize));
4651   // temp = temp->GetImtEntryAt(method_offset);
4652   __ Ldr(temp, MemOperand(temp, method_offset));
4653   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
4654     // We pass the method from the IMT in case of a conflict. This will ensure
4655     // we go into the runtime to resolve the actual method.
4656     __ Mov(ip1, temp);
4657   }
4658   // lr = temp->GetEntryPoint();
4659   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4660 
4661   {
4662     // Ensure the pc position is recorded immediately after the `blr` instruction.
4663     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4664 
4665     // lr();
4666     __ blr(lr);
4667     DCHECK(!codegen_->IsLeafMethod());
4668     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4669   }
4670 
4671   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4672 }
4673 
VisitInvokeVirtual(HInvokeVirtual * invoke)4674 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4675   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4676   if (intrinsic.TryDispatch(invoke)) {
4677     return;
4678   }
4679 
4680   HandleInvoke(invoke);
4681 }
4682 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4683 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4684   // Explicit clinit checks triggered by static invokes must have been pruned by
4685   // art::PrepareForRegisterAllocation.
4686   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4687 
4688   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4689   if (intrinsic.TryDispatch(invoke)) {
4690     return;
4691   }
4692 
4693   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
4694     CriticalNativeCallingConventionVisitorARM64 calling_convention_visitor(
4695         /*for_register_allocation=*/ true);
4696     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4697   } else {
4698     HandleInvoke(invoke);
4699   }
4700 }
4701 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4702 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4703   if (invoke->GetLocations()->Intrinsified()) {
4704     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4705     intrinsic.Dispatch(invoke);
4706     return true;
4707   }
4708   return false;
4709 }
4710 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)4711 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4712       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4713       ArtMethod* method ATTRIBUTE_UNUSED) {
4714   // On ARM64 we support all dispatch types.
4715   return desired_dispatch_info;
4716 }
4717 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)4718 void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
4719   switch (load_kind) {
4720     case MethodLoadKind::kBootImageLinkTimePcRelative: {
4721       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4722       // Add ADRP with its PC-relative method patch.
4723       vixl::aarch64::Label* adrp_label =
4724           NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
4725       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4726       // Add ADD with its PC-relative method patch.
4727       vixl::aarch64::Label* add_label =
4728           NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), adrp_label);
4729       EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
4730       break;
4731     }
4732     case MethodLoadKind::kBootImageRelRo: {
4733       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
4734       uint32_t boot_image_offset = GetBootImageOffset(invoke);
4735       LoadBootImageRelRoEntry(WRegisterFrom(temp), boot_image_offset);
4736       break;
4737     }
4738     case MethodLoadKind::kBssEntry: {
4739       // Add ADRP with its PC-relative .bss entry patch.
4740       vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(invoke->GetMethodReference());
4741       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4742       // Add LDR with its PC-relative .bss entry patch.
4743       vixl::aarch64::Label* ldr_label =
4744           NewMethodBssEntryPatch(invoke->GetMethodReference(), adrp_label);
4745       // All aligned loads are implicitly atomic consume operations on ARM64.
4746       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4747       break;
4748     }
4749     case MethodLoadKind::kJitDirectAddress: {
4750       // Load method address from literal pool.
4751       __ Ldr(XRegisterFrom(temp),
4752              DeduplicateUint64Literal(reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
4753       break;
4754     }
4755     case MethodLoadKind::kRuntimeCall: {
4756       // Test situation, don't do anything.
4757       break;
4758     }
4759     default: {
4760       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
4761       UNREACHABLE();
4762     }
4763   }
4764 }
4765 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4766 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
4767     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4768   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4769   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
4770   switch (invoke->GetMethodLoadKind()) {
4771     case MethodLoadKind::kStringInit: {
4772       uint32_t offset =
4773           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4774       // temp = thread->string_init_entrypoint
4775       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4776       break;
4777     }
4778     case MethodLoadKind::kRecursive: {
4779       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
4780       break;
4781     }
4782     case MethodLoadKind::kRuntimeCall: {
4783       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4784       return;  // No code pointer retrieval; the runtime performs the call directly.
4785     }
4786     case MethodLoadKind::kBootImageLinkTimePcRelative:
4787       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4788       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
4789         // Do not materialize the method pointer, load directly the entrypoint.
4790         // Add ADRP with its PC-relative JNI entrypoint patch.
4791         vixl::aarch64::Label* adrp_label =
4792             NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
4793         EmitAdrpPlaceholder(adrp_label, lr);
4794         // Add the LDR with its PC-relative method patch.
4795         vixl::aarch64::Label* add_label =
4796             NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), adrp_label);
4797         EmitLdrOffsetPlaceholder(add_label, lr, lr);
4798         break;
4799       }
4800       FALLTHROUGH_INTENDED;
4801     default: {
4802       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
4803       break;
4804     }
4805   }
4806 
4807   auto call_lr = [&]() {
4808     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4809     ExactAssemblyScope eas(GetVIXLAssembler(),
4810                            kInstructionSize,
4811                            CodeBufferCheckScope::kExactSize);
4812     // lr()
4813     __ blr(lr);
4814     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4815   };
4816   switch (invoke->GetCodePtrLocation()) {
4817     case CodePtrLocation::kCallSelf:
4818       {
4819         DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
4820         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4821         ExactAssemblyScope eas(GetVIXLAssembler(),
4822                                kInstructionSize,
4823                                CodeBufferCheckScope::kExactSize);
4824         __ bl(&frame_entry_label_);
4825         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4826       }
4827       break;
4828     case CodePtrLocation::kCallCriticalNative: {
4829       size_t out_frame_size =
4830           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorARM64,
4831                                     kAapcs64StackAlignment,
4832                                     GetCriticalNativeDirectCallFrameSize>(invoke);
4833       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
4834         call_lr();
4835       } else {
4836         // LR = callee_method->ptr_sized_fields_.data_;  // EntryPointFromJni
4837         MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kArm64PointerSize);
4838         __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
4839         // lr()
4840         call_lr();
4841       }
4842       // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
4843       switch (invoke->GetType()) {
4844         case DataType::Type::kBool:
4845           __ Ubfx(w0, w0, 0, 8);
4846           break;
4847         case DataType::Type::kInt8:
4848           __ Sbfx(w0, w0, 0, 8);
4849           break;
4850         case DataType::Type::kUint16:
4851           __ Ubfx(w0, w0, 0, 16);
4852           break;
4853         case DataType::Type::kInt16:
4854           __ Sbfx(w0, w0, 0, 16);
4855           break;
4856         case DataType::Type::kInt32:
4857         case DataType::Type::kInt64:
4858         case DataType::Type::kFloat32:
4859         case DataType::Type::kFloat64:
4860         case DataType::Type::kVoid:
4861           break;
4862         default:
4863           DCHECK(false) << invoke->GetType();
4864           break;
4865       }
4866       if (out_frame_size != 0u) {
4867         DecreaseFrame(out_frame_size);
4868       }
4869       break;
4870     }
4871     case CodePtrLocation::kCallArtMethod: {
4872       // LR = callee_method->ptr_sized_fields_.entry_point_from_quick_compiled_code_;
4873       MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4874       __ Ldr(lr, MemOperand(XRegisterFrom(callee_method), offset.Int32Value()));
4875       // lr()
4876       call_lr();
4877       break;
4878     }
4879   }
4880 
4881   DCHECK(!IsLeafMethod());
4882 }
4883 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)4884 void CodeGeneratorARM64::GenerateVirtualCall(
4885     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4886   // Use the calling convention instead of the location of the receiver, as
4887   // intrinsics may have put the receiver in a different register. In the intrinsics
4888   // slow path, the arguments have been moved to the right place, so here we are
4889   // guaranteed that the receiver is the first register of the calling convention.
4890   InvokeDexCallingConvention calling_convention;
4891   Register receiver = calling_convention.GetRegisterAt(0);
4892   Register temp = XRegisterFrom(temp_in);
4893   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4894       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4895   Offset class_offset = mirror::Object::ClassOffset();
4896   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4897 
4898   DCHECK(receiver.IsRegister());
4899 
4900   {
4901     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4902     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4903     // /* HeapReference<Class> */ temp = receiver->klass_
4904     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4905     MaybeRecordImplicitNullCheck(invoke);
4906   }
4907   // Instead of simply (possibly) unpoisoning `temp` here, we should
4908   // emit a read barrier for the previous class reference load.
4909   // intermediate/temporary reference and because the current
4910   // concurrent copying collector keeps the from-space memory
4911   // intact/accessible until the end of the marking phase (the
4912   // concurrent copying collector may not in the future).
4913   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4914 
4915   // If we're compiling baseline, update the inline cache.
4916   MaybeGenerateInlineCacheCheck(invoke, temp);
4917 
4918   // temp = temp->GetMethodAt(method_offset);
4919   __ Ldr(temp, MemOperand(temp, method_offset));
4920   // lr = temp->GetEntryPoint();
4921   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4922   {
4923     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4924     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4925     // lr();
4926     __ blr(lr);
4927     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4928   }
4929 }
4930 
MoveFromReturnRegister(Location trg,DataType::Type type)4931 void CodeGeneratorARM64::MoveFromReturnRegister(Location trg, DataType::Type type) {
4932   if (!trg.IsValid()) {
4933     DCHECK(type == DataType::Type::kVoid);
4934     return;
4935   }
4936 
4937   DCHECK_NE(type, DataType::Type::kVoid);
4938 
4939   if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
4940     Register trg_reg = RegisterFrom(trg, type);
4941     Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
4942     __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
4943   } else {
4944     VRegister trg_reg = FPRegisterFrom(trg, type);
4945     VRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
4946     __ Fmov(trg_reg, res_reg);
4947   }
4948 }
4949 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4950 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4951   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4952   if (intrinsic.TryDispatch(invoke)) {
4953     return;
4954   }
4955   HandleInvoke(invoke);
4956 }
4957 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4958 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4959   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4960     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4961     return;
4962   }
4963   codegen_->GenerateInvokePolymorphicCall(invoke);
4964   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4965 }
4966 
VisitInvokeCustom(HInvokeCustom * invoke)4967 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4968   HandleInvoke(invoke);
4969 }
4970 
VisitInvokeCustom(HInvokeCustom * invoke)4971 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4972   codegen_->GenerateInvokeCustomCall(invoke);
4973   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4974 }
4975 
NewBootImageIntrinsicPatch(uint32_t intrinsic_data,vixl::aarch64::Label * adrp_label)4976 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch(
4977     uint32_t intrinsic_data,
4978     vixl::aarch64::Label* adrp_label) {
4979   return NewPcRelativePatch(
4980       /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_other_patches_);
4981 }
4982 
NewBootImageRelRoPatch(uint32_t boot_image_offset,vixl::aarch64::Label * adrp_label)4983 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
4984     uint32_t boot_image_offset,
4985     vixl::aarch64::Label* adrp_label) {
4986   return NewPcRelativePatch(
4987       /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_other_patches_);
4988 }
4989 
NewBootImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4990 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
4991     MethodReference target_method,
4992     vixl::aarch64::Label* adrp_label) {
4993   return NewPcRelativePatch(
4994       target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
4995 }
4996 
NewMethodBssEntryPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4997 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
4998     MethodReference target_method,
4999     vixl::aarch64::Label* adrp_label) {
5000   return NewPcRelativePatch(
5001       target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
5002 }
5003 
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)5004 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
5005     const DexFile& dex_file,
5006     dex::TypeIndex type_index,
5007     vixl::aarch64::Label* adrp_label) {
5008   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
5009 }
5010 
NewBssEntryTypePatch(HLoadClass * load_class,vixl::aarch64::Label * adrp_label)5011 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
5012     HLoadClass* load_class,
5013     vixl::aarch64::Label* adrp_label) {
5014   const DexFile& dex_file = load_class->GetDexFile();
5015   dex::TypeIndex type_index = load_class->GetTypeIndex();
5016   ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
5017   switch (load_class->GetLoadKind()) {
5018     case HLoadClass::LoadKind::kBssEntry:
5019       patches = &type_bss_entry_patches_;
5020       break;
5021     case HLoadClass::LoadKind::kBssEntryPublic:
5022       patches = &public_type_bss_entry_patches_;
5023       break;
5024     case HLoadClass::LoadKind::kBssEntryPackage:
5025       patches = &package_type_bss_entry_patches_;
5026       break;
5027     default:
5028       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5029       UNREACHABLE();
5030   }
5031   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, patches);
5032 }
5033 
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)5034 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
5035     const DexFile& dex_file,
5036     dex::StringIndex string_index,
5037     vixl::aarch64::Label* adrp_label) {
5038   return NewPcRelativePatch(
5039       &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
5040 }
5041 
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)5042 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
5043     const DexFile& dex_file,
5044     dex::StringIndex string_index,
5045     vixl::aarch64::Label* adrp_label) {
5046   return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
5047 }
5048 
NewBootImageJniEntrypointPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)5049 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageJniEntrypointPatch(
5050     MethodReference target_method,
5051     vixl::aarch64::Label* adrp_label) {
5052   return NewPcRelativePatch(
5053       target_method.dex_file, target_method.index, adrp_label, &boot_image_jni_entrypoint_patches_);
5054 }
5055 
EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset)5056 void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
5057   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
5058   DCHECK(!GetCompilerOptions().IsJitCompiler());
5059   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
5060   vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label;
5061   __ bind(bl_label);
5062   __ bl(static_cast<int64_t>(0));  // Placeholder, patched at link-time.
5063 }
5064 
EmitBakerReadBarrierCbnz(uint32_t custom_data)5065 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
5066   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
5067   if (GetCompilerOptions().IsJitCompiler()) {
5068     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
5069     vixl::aarch64::Label* slow_path_entry = &it->second.label;
5070     __ cbnz(mr, slow_path_entry);
5071   } else {
5072     baker_read_barrier_patches_.emplace_back(custom_data);
5073     vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label;
5074     __ bind(cbnz_label);
5075     __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
5076   }
5077 }
5078 
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)5079 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
5080     const DexFile* dex_file,
5081     uint32_t offset_or_index,
5082     vixl::aarch64::Label* adrp_label,
5083     ArenaDeque<PcRelativePatchInfo>* patches) {
5084   // Add a patch entry and return the label.
5085   patches->emplace_back(dex_file, offset_or_index);
5086   PcRelativePatchInfo* info = &patches->back();
5087   vixl::aarch64::Label* label = &info->label;
5088   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
5089   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
5090   return label;
5091 }
5092 
DeduplicateBootImageAddressLiteral(uint64_t address)5093 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
5094     uint64_t address) {
5095   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
5096 }
5097 
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)5098 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
5099     const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
5100   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
5101   return jit_string_patches_.GetOrCreate(
5102       StringReference(&dex_file, string_index),
5103       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
5104 }
5105 
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)5106 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
5107     const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
5108   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
5109   return jit_class_patches_.GetOrCreate(
5110       TypeReference(&dex_file, type_index),
5111       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
5112 }
5113 
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)5114 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
5115                                              vixl::aarch64::Register reg) {
5116   DCHECK(reg.IsX());
5117   SingleEmissionCheckScope guard(GetVIXLAssembler());
5118   __ Bind(fixup_label);
5119   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
5120 }
5121 
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)5122 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
5123                                             vixl::aarch64::Register out,
5124                                             vixl::aarch64::Register base) {
5125   DCHECK(out.IsX());
5126   DCHECK(base.IsX());
5127   SingleEmissionCheckScope guard(GetVIXLAssembler());
5128   __ Bind(fixup_label);
5129   __ add(out, base, Operand(/* offset placeholder */ 0));
5130 }
5131 
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)5132 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
5133                                                   vixl::aarch64::Register out,
5134                                                   vixl::aarch64::Register base) {
5135   DCHECK(base.IsX());
5136   SingleEmissionCheckScope guard(GetVIXLAssembler());
5137   __ Bind(fixup_label);
5138   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
5139 }
5140 
LoadBootImageRelRoEntry(vixl::aarch64::Register reg,uint32_t boot_image_offset)5141 void CodeGeneratorARM64::LoadBootImageRelRoEntry(vixl::aarch64::Register reg,
5142                                                  uint32_t boot_image_offset) {
5143   DCHECK(reg.IsW());
5144   // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
5145   vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
5146   EmitAdrpPlaceholder(adrp_label, reg.X());
5147   // Add LDR with its PC-relative .data.bimg.rel.ro patch.
5148   vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
5149   EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
5150 }
5151 
LoadBootImageAddress(vixl::aarch64::Register reg,uint32_t boot_image_reference)5152 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
5153                                               uint32_t boot_image_reference) {
5154   if (GetCompilerOptions().IsBootImage()) {
5155     // Add ADRP with its PC-relative type patch.
5156     vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference);
5157     EmitAdrpPlaceholder(adrp_label, reg.X());
5158     // Add ADD with its PC-relative type patch.
5159     vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label);
5160     EmitAddPlaceholder(add_label, reg.X(), reg.X());
5161   } else if (GetCompilerOptions().GetCompilePic()) {
5162     LoadBootImageRelRoEntry(reg, boot_image_reference);
5163   } else {
5164     DCHECK(GetCompilerOptions().IsJitCompiler());
5165     gc::Heap* heap = Runtime::Current()->GetHeap();
5166     DCHECK(!heap->GetBootImageSpaces().empty());
5167     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5168     __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address)));
5169   }
5170 }
5171 
LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,TypeReference target_type)5172 void CodeGeneratorARM64::LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,
5173                                                        TypeReference target_type) {
5174   // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5175   DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5176   // Add ADRP with its PC-relative type patch.
5177   vixl::aarch64::Label* adrp_label =
5178       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
5179   EmitAdrpPlaceholder(adrp_label, reg.X());
5180   // Add ADD with its PC-relative type patch.
5181   vixl::aarch64::Label* add_label =
5182       NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex(), adrp_label);
5183   EmitAddPlaceholder(add_label, reg.X(), reg.X());
5184 }
5185 
LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg,HInvoke * invoke)5186 void CodeGeneratorARM64::LoadIntrinsicDeclaringClass(vixl::aarch64::Register reg, HInvoke* invoke) {
5187   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5188   if (GetCompilerOptions().IsBootImage()) {
5189     MethodReference target_method = invoke->GetResolvedMethodReference();
5190     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5191     LoadTypeForBootImageIntrinsic(reg, TypeReference(target_method.dex_file, type_idx));
5192   } else {
5193     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5194     LoadBootImageAddress(reg, boot_image_offset);
5195   }
5196 }
5197 
LoadClassRootForIntrinsic(vixl::aarch64::Register reg,ClassRoot class_root)5198 void CodeGeneratorARM64::LoadClassRootForIntrinsic(vixl::aarch64::Register reg,
5199                                                    ClassRoot class_root) {
5200   if (GetCompilerOptions().IsBootImage()) {
5201     ScopedObjectAccess soa(Thread::Current());
5202     ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
5203     TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
5204     LoadTypeForBootImageIntrinsic(reg, target_type);
5205   } else {
5206     uint32_t boot_image_offset = GetBootImageOffset(class_root);
5207     LoadBootImageAddress(reg, boot_image_offset);
5208   }
5209 }
5210 
5211 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5212 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
5213     const ArenaDeque<PcRelativePatchInfo>& infos,
5214     ArenaVector<linker::LinkerPatch>* linker_patches) {
5215   for (const PcRelativePatchInfo& info : infos) {
5216     linker_patches->push_back(Factory(info.label.GetLocation(),
5217                                       info.target_dex_file,
5218                                       info.pc_insn_label->GetLocation(),
5219                                       info.offset_or_index));
5220   }
5221 }
5222 
5223 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5224 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5225                                      const DexFile* target_dex_file,
5226                                      uint32_t pc_insn_offset,
5227                                      uint32_t boot_image_offset) {
5228   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
5229   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5230 }
5231 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5232 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5233   DCHECK(linker_patches->empty());
5234   size_t size =
5235       boot_image_method_patches_.size() +
5236       method_bss_entry_patches_.size() +
5237       boot_image_type_patches_.size() +
5238       type_bss_entry_patches_.size() +
5239       public_type_bss_entry_patches_.size() +
5240       package_type_bss_entry_patches_.size() +
5241       boot_image_string_patches_.size() +
5242       string_bss_entry_patches_.size() +
5243       boot_image_jni_entrypoint_patches_.size() +
5244       boot_image_other_patches_.size() +
5245       call_entrypoint_patches_.size() +
5246       baker_read_barrier_patches_.size();
5247   linker_patches->reserve(size);
5248   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5249     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5250         boot_image_method_patches_, linker_patches);
5251     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5252         boot_image_type_patches_, linker_patches);
5253     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5254         boot_image_string_patches_, linker_patches);
5255   } else {
5256     DCHECK(boot_image_method_patches_.empty());
5257     DCHECK(boot_image_type_patches_.empty());
5258     DCHECK(boot_image_string_patches_.empty());
5259   }
5260   if (GetCompilerOptions().IsBootImage()) {
5261     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5262         boot_image_other_patches_, linker_patches);
5263   } else {
5264     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5265         boot_image_other_patches_, linker_patches);
5266   }
5267   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5268       method_bss_entry_patches_, linker_patches);
5269   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5270       type_bss_entry_patches_, linker_patches);
5271   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5272       public_type_bss_entry_patches_, linker_patches);
5273   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5274       package_type_bss_entry_patches_, linker_patches);
5275   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5276       string_bss_entry_patches_, linker_patches);
5277   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5278       boot_image_jni_entrypoint_patches_, linker_patches);
5279   for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
5280     DCHECK(info.target_dex_file == nullptr);
5281     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
5282         info.label.GetLocation(), info.offset_or_index));
5283   }
5284   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
5285     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
5286         info.label.GetLocation(), info.custom_data));
5287   }
5288   DCHECK_EQ(size, linker_patches->size());
5289 }
5290 
NeedsThunkCode(const linker::LinkerPatch & patch) const5291 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
5292   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
5293          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
5294          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
5295 }
5296 
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)5297 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
5298                                        /*out*/ ArenaVector<uint8_t>* code,
5299                                        /*out*/ std::string* debug_name) {
5300   Arm64Assembler assembler(GetGraph()->GetAllocator());
5301   switch (patch.GetType()) {
5302     case linker::LinkerPatch::Type::kCallRelative: {
5303       // The thunk just uses the entry point in the ArtMethod. This works even for calls
5304       // to the generic JNI and interpreter trampolines.
5305       Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5306           kArm64PointerSize).Int32Value());
5307       assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
5308       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5309         *debug_name = "MethodCallThunk";
5310       }
5311       break;
5312     }
5313     case linker::LinkerPatch::Type::kCallEntrypoint: {
5314       Offset offset(patch.EntrypointOffset());
5315       assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0));
5316       if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
5317         *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value());
5318       }
5319       break;
5320     }
5321     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
5322       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
5323       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
5324       break;
5325     }
5326     default:
5327       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
5328       UNREACHABLE();
5329   }
5330 
5331   // Ensure we emit the literal pool if any.
5332   assembler.FinalizeCode();
5333   code->resize(assembler.CodeSize());
5334   MemoryRegion code_region(code->data(), code->size());
5335   assembler.FinalizeInstructions(code_region);
5336 }
5337 
DeduplicateUint32Literal(uint32_t value)5338 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
5339   return uint32_literals_.GetOrCreate(
5340       value,
5341       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
5342 }
5343 
DeduplicateUint64Literal(uint64_t value)5344 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
5345   return uint64_literals_.GetOrCreate(
5346       value,
5347       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
5348 }
5349 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)5350 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
5351   // Explicit clinit checks triggered by static invokes must have been pruned by
5352   // art::PrepareForRegisterAllocation.
5353   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
5354 
5355   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5356     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5357     return;
5358   }
5359 
5360   LocationSummary* locations = invoke->GetLocations();
5361   codegen_->GenerateStaticOrDirectCall(
5362       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
5363 
5364   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5365 }
5366 
VisitInvokeVirtual(HInvokeVirtual * invoke)5367 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
5368   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
5369     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5370     return;
5371   }
5372 
5373   {
5374     // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
5375     // are no pools emitted.
5376     EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
5377     codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
5378     DCHECK(!codegen_->IsLeafMethod());
5379   }
5380 
5381   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5382 }
5383 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5384 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
5385     HLoadClass::LoadKind desired_class_load_kind) {
5386   switch (desired_class_load_kind) {
5387     case HLoadClass::LoadKind::kInvalid:
5388       LOG(FATAL) << "UNREACHABLE";
5389       UNREACHABLE();
5390     case HLoadClass::LoadKind::kReferrersClass:
5391       break;
5392     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5393     case HLoadClass::LoadKind::kBootImageRelRo:
5394     case HLoadClass::LoadKind::kBssEntry:
5395     case HLoadClass::LoadKind::kBssEntryPublic:
5396     case HLoadClass::LoadKind::kBssEntryPackage:
5397       DCHECK(!GetCompilerOptions().IsJitCompiler());
5398       break;
5399     case HLoadClass::LoadKind::kJitBootImageAddress:
5400     case HLoadClass::LoadKind::kJitTableAddress:
5401       DCHECK(GetCompilerOptions().IsJitCompiler());
5402       break;
5403     case HLoadClass::LoadKind::kRuntimeCall:
5404       break;
5405   }
5406   return desired_class_load_kind;
5407 }
5408 
VisitLoadClass(HLoadClass * cls)5409 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
5410   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5411   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5412     InvokeRuntimeCallingConvention calling_convention;
5413     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5414         cls,
5415         LocationFrom(calling_convention.GetRegisterAt(0)),
5416         LocationFrom(vixl::aarch64::x0));
5417     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
5418     return;
5419   }
5420   DCHECK_EQ(cls->NeedsAccessCheck(),
5421             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5422                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5423 
5424   const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
5425   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5426       ? LocationSummary::kCallOnSlowPath
5427       : LocationSummary::kNoCall;
5428   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5429   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5430     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5431   }
5432 
5433   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5434     locations->SetInAt(0, Location::RequiresRegister());
5435   }
5436   locations->SetOut(Location::RequiresRegister());
5437   if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
5438     if (!gUseReadBarrier || kUseBakerReadBarrier) {
5439       // Rely on the type resolution or initialization and marking to save everything we need.
5440       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5441     } else {
5442       // For non-Baker read barrier we have a temp-clobbering call.
5443     }
5444   }
5445 }
5446 
5447 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5448 // move.
VisitLoadClass(HLoadClass * cls)5449 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5450   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5451   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5452     codegen_->GenerateLoadClassRuntimeCall(cls);
5453     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5454     return;
5455   }
5456   DCHECK_EQ(cls->NeedsAccessCheck(),
5457             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
5458                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
5459 
5460   Location out_loc = cls->GetLocations()->Out();
5461   Register out = OutputRegister(cls);
5462 
5463   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
5464       ? kWithoutReadBarrier
5465       : gCompilerReadBarrierOption;
5466   bool generate_null_check = false;
5467   switch (load_kind) {
5468     case HLoadClass::LoadKind::kReferrersClass: {
5469       DCHECK(!cls->CanCallRuntime());
5470       DCHECK(!cls->MustGenerateClinitCheck());
5471       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5472       Register current_method = InputRegisterAt(cls, 0);
5473       codegen_->GenerateGcRootFieldLoad(cls,
5474                                         out_loc,
5475                                         current_method,
5476                                         ArtMethod::DeclaringClassOffset().Int32Value(),
5477                                         /* fixup_label= */ nullptr,
5478                                         read_barrier_option);
5479       break;
5480     }
5481     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
5482       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5483              codegen_->GetCompilerOptions().IsBootImageExtension());
5484       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5485       // Add ADRP with its PC-relative type patch.
5486       const DexFile& dex_file = cls->GetDexFile();
5487       dex::TypeIndex type_index = cls->GetTypeIndex();
5488       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
5489       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5490       // Add ADD with its PC-relative type patch.
5491       vixl::aarch64::Label* add_label =
5492           codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
5493       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5494       break;
5495     }
5496     case HLoadClass::LoadKind::kBootImageRelRo: {
5497       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5498       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(cls);
5499       codegen_->LoadBootImageRelRoEntry(out.W(), boot_image_offset);
5500       break;
5501     }
5502     case HLoadClass::LoadKind::kBssEntry:
5503     case HLoadClass::LoadKind::kBssEntryPublic:
5504     case HLoadClass::LoadKind::kBssEntryPackage: {
5505       // Add ADRP with its PC-relative Class .bss entry patch.
5506       vixl::aarch64::Register temp = XRegisterFrom(out_loc);
5507       vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(cls);
5508       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5509       // Add LDR with its PC-relative Class .bss entry patch.
5510       vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(cls, adrp_label);
5511       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
5512       // All aligned loads are implicitly atomic consume operations on ARM64.
5513       codegen_->GenerateGcRootFieldLoad(cls,
5514                                         out_loc,
5515                                         temp,
5516                                         /* offset placeholder */ 0u,
5517                                         ldr_label,
5518                                         read_barrier_option);
5519       generate_null_check = true;
5520       break;
5521     }
5522     case HLoadClass::LoadKind::kJitBootImageAddress: {
5523       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5524       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
5525       DCHECK_NE(address, 0u);
5526       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5527       break;
5528     }
5529     case HLoadClass::LoadKind::kJitTableAddress: {
5530       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
5531                                                        cls->GetTypeIndex(),
5532                                                        cls->GetClass()));
5533       codegen_->GenerateGcRootFieldLoad(cls,
5534                                         out_loc,
5535                                         out.X(),
5536                                         /* offset= */ 0,
5537                                         /* fixup_label= */ nullptr,
5538                                         read_barrier_option);
5539       break;
5540     }
5541     case HLoadClass::LoadKind::kRuntimeCall:
5542     case HLoadClass::LoadKind::kInvalid:
5543       LOG(FATAL) << "UNREACHABLE";
5544       UNREACHABLE();
5545   }
5546 
5547   bool do_clinit = cls->MustGenerateClinitCheck();
5548   if (generate_null_check || do_clinit) {
5549     DCHECK(cls->CanCallRuntime());
5550     SlowPathCodeARM64* slow_path =
5551         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls);
5552     codegen_->AddSlowPath(slow_path);
5553     if (generate_null_check) {
5554       __ Cbz(out, slow_path->GetEntryLabel());
5555     }
5556     if (cls->MustGenerateClinitCheck()) {
5557       GenerateClassInitializationCheck(slow_path, out);
5558     } else {
5559       __ Bind(slow_path->GetExitLabel());
5560     }
5561     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5562   }
5563 }
5564 
VisitLoadMethodHandle(HLoadMethodHandle * load)5565 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5566   InvokeRuntimeCallingConvention calling_convention;
5567   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5568   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
5569 }
5570 
VisitLoadMethodHandle(HLoadMethodHandle * load)5571 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
5572   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
5573 }
5574 
VisitLoadMethodType(HLoadMethodType * load)5575 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) {
5576   InvokeRuntimeCallingConvention calling_convention;
5577   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
5578   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
5579 }
5580 
VisitLoadMethodType(HLoadMethodType * load)5581 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) {
5582   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
5583 }
5584 
GetExceptionTlsAddress()5585 static MemOperand GetExceptionTlsAddress() {
5586   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
5587 }
5588 
VisitLoadException(HLoadException * load)5589 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
5590   LocationSummary* locations =
5591       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
5592   locations->SetOut(Location::RequiresRegister());
5593 }
5594 
VisitLoadException(HLoadException * instruction)5595 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
5596   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
5597 }
5598 
VisitClearException(HClearException * clear)5599 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
5600   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
5601 }
5602 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5603 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5604   __ Str(wzr, GetExceptionTlsAddress());
5605 }
5606 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5607 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
5608     HLoadString::LoadKind desired_string_load_kind) {
5609   switch (desired_string_load_kind) {
5610     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5611     case HLoadString::LoadKind::kBootImageRelRo:
5612     case HLoadString::LoadKind::kBssEntry:
5613       DCHECK(!GetCompilerOptions().IsJitCompiler());
5614       break;
5615     case HLoadString::LoadKind::kJitBootImageAddress:
5616     case HLoadString::LoadKind::kJitTableAddress:
5617       DCHECK(GetCompilerOptions().IsJitCompiler());
5618       break;
5619     case HLoadString::LoadKind::kRuntimeCall:
5620       break;
5621   }
5622   return desired_string_load_kind;
5623 }
5624 
VisitLoadString(HLoadString * load)5625 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5626   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5627   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
5628   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
5629     InvokeRuntimeCallingConvention calling_convention;
5630     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5631   } else {
5632     locations->SetOut(Location::RequiresRegister());
5633     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5634       if (!gUseReadBarrier || kUseBakerReadBarrier) {
5635         // Rely on the pResolveString and marking to save everything we need.
5636         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5637       } else {
5638         // For non-Baker read barrier we have a temp-clobbering call.
5639       }
5640     }
5641   }
5642 }
5643 
5644 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5645 // move.
VisitLoadString(HLoadString * load)5646 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5647   Register out = OutputRegister(load);
5648   Location out_loc = load->GetLocations()->Out();
5649 
5650   switch (load->GetLoadKind()) {
5651     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5652       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
5653              codegen_->GetCompilerOptions().IsBootImageExtension());
5654       // Add ADRP with its PC-relative String patch.
5655       const DexFile& dex_file = load->GetDexFile();
5656       const dex::StringIndex string_index = load->GetStringIndex();
5657       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5658       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5659       // Add ADD with its PC-relative String patch.
5660       vixl::aarch64::Label* add_label =
5661           codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5662       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5663       return;
5664     }
5665     case HLoadString::LoadKind::kBootImageRelRo: {
5666       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5667       uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(load);
5668       codegen_->LoadBootImageRelRoEntry(out.W(), boot_image_offset);
5669       return;
5670     }
5671     case HLoadString::LoadKind::kBssEntry: {
5672       // Add ADRP with its PC-relative String .bss entry patch.
5673       const DexFile& dex_file = load->GetDexFile();
5674       const dex::StringIndex string_index = load->GetStringIndex();
5675       Register temp = XRegisterFrom(out_loc);
5676       vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
5677       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5678       // Add LDR with its PC-relative String .bss entry patch.
5679       vixl::aarch64::Label* ldr_label =
5680           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
5681       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
5682       // All aligned loads are implicitly atomic consume operations on ARM64.
5683       codegen_->GenerateGcRootFieldLoad(load,
5684                                         out_loc,
5685                                         temp,
5686                                         /* offset placeholder */ 0u,
5687                                         ldr_label,
5688                                         gCompilerReadBarrierOption);
5689       SlowPathCodeARM64* slow_path =
5690           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
5691       codegen_->AddSlowPath(slow_path);
5692       __ Cbz(out.X(), slow_path->GetEntryLabel());
5693       __ Bind(slow_path->GetExitLabel());
5694       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5695       return;
5696     }
5697     case HLoadString::LoadKind::kJitBootImageAddress: {
5698       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
5699       DCHECK_NE(address, 0u);
5700       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5701       return;
5702     }
5703     case HLoadString::LoadKind::kJitTableAddress: {
5704       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5705                                                         load->GetStringIndex(),
5706                                                         load->GetString()));
5707       codegen_->GenerateGcRootFieldLoad(load,
5708                                         out_loc,
5709                                         out.X(),
5710                                         /* offset= */ 0,
5711                                         /* fixup_label= */ nullptr,
5712                                         gCompilerReadBarrierOption);
5713       return;
5714     }
5715     default:
5716       break;
5717   }
5718 
5719   // TODO: Re-add the compiler code to do string dex cache lookup again.
5720   InvokeRuntimeCallingConvention calling_convention;
5721   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5722   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5723   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5724   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5725   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5726 }
5727 
VisitLongConstant(HLongConstant * constant)5728 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5729   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
5730   locations->SetOut(Location::ConstantLocation(constant));
5731 }
5732 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)5733 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
5734   // Will be generated at use site.
5735 }
5736 
VisitMonitorOperation(HMonitorOperation * instruction)5737 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5738   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5739       instruction, LocationSummary::kCallOnMainOnly);
5740   InvokeRuntimeCallingConvention calling_convention;
5741   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5742 }
5743 
VisitMonitorOperation(HMonitorOperation * instruction)5744 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5745   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5746                           instruction,
5747                           instruction->GetDexPc());
5748   if (instruction->IsEnter()) {
5749     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5750   } else {
5751     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5752   }
5753   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5754 }
5755 
VisitMul(HMul * mul)5756 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5757   LocationSummary* locations =
5758       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
5759   switch (mul->GetResultType()) {
5760     case DataType::Type::kInt32:
5761     case DataType::Type::kInt64:
5762       locations->SetInAt(0, Location::RequiresRegister());
5763       locations->SetInAt(1, Location::RequiresRegister());
5764       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5765       break;
5766 
5767     case DataType::Type::kFloat32:
5768     case DataType::Type::kFloat64:
5769       locations->SetInAt(0, Location::RequiresFpuRegister());
5770       locations->SetInAt(1, Location::RequiresFpuRegister());
5771       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5772       break;
5773 
5774     default:
5775       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5776   }
5777 }
5778 
VisitMul(HMul * mul)5779 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5780   switch (mul->GetResultType()) {
5781     case DataType::Type::kInt32:
5782     case DataType::Type::kInt64:
5783       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5784       break;
5785 
5786     case DataType::Type::kFloat32:
5787     case DataType::Type::kFloat64:
5788       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5789       break;
5790 
5791     default:
5792       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5793   }
5794 }
5795 
VisitNeg(HNeg * neg)5796 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5797   LocationSummary* locations =
5798       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
5799   switch (neg->GetResultType()) {
5800     case DataType::Type::kInt32:
5801     case DataType::Type::kInt64:
5802       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5803       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5804       break;
5805 
5806     case DataType::Type::kFloat32:
5807     case DataType::Type::kFloat64:
5808       locations->SetInAt(0, Location::RequiresFpuRegister());
5809       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5810       break;
5811 
5812     default:
5813       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5814   }
5815 }
5816 
VisitNeg(HNeg * neg)5817 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5818   switch (neg->GetResultType()) {
5819     case DataType::Type::kInt32:
5820     case DataType::Type::kInt64:
5821       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5822       break;
5823 
5824     case DataType::Type::kFloat32:
5825     case DataType::Type::kFloat64:
5826       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5827       break;
5828 
5829     default:
5830       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5831   }
5832 }
5833 
VisitNewArray(HNewArray * instruction)5834 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5835   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5836       instruction, LocationSummary::kCallOnMainOnly);
5837   InvokeRuntimeCallingConvention calling_convention;
5838   locations->SetOut(LocationFrom(x0));
5839   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5840   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5841 }
5842 
VisitNewArray(HNewArray * instruction)5843 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5844   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5845   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5846   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5847   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5848   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5849 }
5850 
VisitNewInstance(HNewInstance * instruction)5851 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5852   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5853       instruction, LocationSummary::kCallOnMainOnly);
5854   InvokeRuntimeCallingConvention calling_convention;
5855   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5856   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
5857 }
5858 
VisitNewInstance(HNewInstance * instruction)5859 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5860   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5861   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5862   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5863 }
5864 
VisitNot(HNot * instruction)5865 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5866   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5867   locations->SetInAt(0, Location::RequiresRegister());
5868   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5869 }
5870 
VisitNot(HNot * instruction)5871 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5872   switch (instruction->GetResultType()) {
5873     case DataType::Type::kInt32:
5874     case DataType::Type::kInt64:
5875       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5876       break;
5877 
5878     default:
5879       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5880   }
5881 }
5882 
VisitBooleanNot(HBooleanNot * instruction)5883 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5884   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5885   locations->SetInAt(0, Location::RequiresRegister());
5886   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5887 }
5888 
VisitBooleanNot(HBooleanNot * instruction)5889 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5890   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5891 }
5892 
VisitNullCheck(HNullCheck * instruction)5893 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5894   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5895   locations->SetInAt(0, Location::RequiresRegister());
5896 }
5897 
GenerateImplicitNullCheck(HNullCheck * instruction)5898 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5899   if (CanMoveNullCheckToUser(instruction)) {
5900     return;
5901   }
5902   {
5903     // Ensure that between load and RecordPcInfo there are no pools emitted.
5904     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5905     Location obj = instruction->GetLocations()->InAt(0);
5906     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5907     RecordPcInfo(instruction, instruction->GetDexPc());
5908   }
5909 }
5910 
GenerateExplicitNullCheck(HNullCheck * instruction)5911 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5912   SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
5913   AddSlowPath(slow_path);
5914 
5915   LocationSummary* locations = instruction->GetLocations();
5916   Location obj = locations->InAt(0);
5917 
5918   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5919 }
5920 
VisitNullCheck(HNullCheck * instruction)5921 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5922   codegen_->GenerateNullCheck(instruction);
5923 }
5924 
VisitOr(HOr * instruction)5925 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5926   HandleBinaryOp(instruction);
5927 }
5928 
VisitOr(HOr * instruction)5929 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5930   HandleBinaryOp(instruction);
5931 }
5932 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5933 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5934   LOG(FATAL) << "Unreachable";
5935 }
5936 
VisitParallelMove(HParallelMove * instruction)5937 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5938   if (instruction->GetNext()->IsSuspendCheck() &&
5939       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5940     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5941     // The back edge will generate the suspend check.
5942     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5943   }
5944 
5945   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5946 }
5947 
VisitParameterValue(HParameterValue * instruction)5948 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5949   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5950   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5951   if (location.IsStackSlot()) {
5952     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5953   } else if (location.IsDoubleStackSlot()) {
5954     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5955   }
5956   locations->SetOut(location);
5957 }
5958 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5959 void InstructionCodeGeneratorARM64::VisitParameterValue(
5960     HParameterValue* instruction ATTRIBUTE_UNUSED) {
5961   // Nothing to do, the parameter is already at its location.
5962 }
5963 
VisitCurrentMethod(HCurrentMethod * instruction)5964 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5965   LocationSummary* locations =
5966       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5967   locations->SetOut(LocationFrom(kArtMethodRegister));
5968 }
5969 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5970 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5971     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5972   // Nothing to do, the method is already at its location.
5973 }
5974 
VisitPhi(HPhi * instruction)5975 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5976   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5977   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5978     locations->SetInAt(i, Location::Any());
5979   }
5980   locations->SetOut(Location::Any());
5981 }
5982 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5983 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5984   LOG(FATAL) << "Unreachable";
5985 }
5986 
VisitRem(HRem * rem)5987 void LocationsBuilderARM64::VisitRem(HRem* rem) {
5988   DataType::Type type = rem->GetResultType();
5989   LocationSummary::CallKind call_kind =
5990       DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5991                                            : LocationSummary::kNoCall;
5992   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
5993 
5994   switch (type) {
5995     case DataType::Type::kInt32:
5996     case DataType::Type::kInt64:
5997       locations->SetInAt(0, Location::RequiresRegister());
5998       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5999       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6000       break;
6001 
6002     case DataType::Type::kFloat32:
6003     case DataType::Type::kFloat64: {
6004       InvokeRuntimeCallingConvention calling_convention;
6005       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
6006       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
6007       locations->SetOut(calling_convention.GetReturnLocation(type));
6008 
6009       break;
6010     }
6011 
6012     default:
6013       LOG(FATAL) << "Unexpected rem type " << type;
6014   }
6015 }
6016 
GenerateIntRemForPower2Denom(HRem * instruction)6017 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) {
6018   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
6019   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
6020   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
6021 
6022   Register out = OutputRegister(instruction);
6023   Register dividend = InputRegisterAt(instruction, 0);
6024 
6025   if (HasNonNegativeOrMinIntInputAt(instruction, 0)) {
6026     // No need to adjust the result for non-negative dividends or the INT32_MIN/INT64_MIN dividends.
6027     // NOTE: The generated code for HRem correctly works for the INT32_MIN/INT64_MIN dividends.
6028     // INT*_MIN % imm must be 0 for any imm of power 2. 'and' works only with bits
6029     // 0..30 (Int32 case)/0..62 (Int64 case) of a dividend. For INT32_MIN/INT64_MIN they are zeros.
6030     // So 'and' always produces zero.
6031     __ And(out, dividend, abs_imm - 1);
6032   } else {
6033     if (abs_imm == 2) {
6034       __ Cmp(dividend, 0);
6035       __ And(out, dividend, 1);
6036       __ Csneg(out, out, out, ge);
6037     } else {
6038       UseScratchRegisterScope temps(GetVIXLAssembler());
6039       Register temp = temps.AcquireSameSizeAs(out);
6040 
6041       __ Negs(temp, dividend);
6042       __ And(out, dividend, abs_imm - 1);
6043       __ And(temp, temp, abs_imm - 1);
6044       __ Csneg(out, out, temp, mi);
6045     }
6046   }
6047 }
6048 
GenerateIntRemForConstDenom(HRem * instruction)6049 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) {
6050   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
6051 
6052   if (imm == 0) {
6053     // Do not generate anything.
6054     // DivZeroCheck would prevent any code to be executed.
6055     return;
6056   }
6057 
6058   if (IsPowerOfTwo(AbsOrMin(imm))) {
6059     // Cases imm == -1 or imm == 1 are handled in constant folding by
6060     // InstructionWithAbsorbingInputSimplifier.
6061     // If the cases have survided till code generation they are handled in
6062     // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0).
6063     // The correct code is generated for them, just more instructions.
6064     GenerateIntRemForPower2Denom(instruction);
6065   } else {
6066     DCHECK(imm < -2 || imm > 2) << imm;
6067     GenerateDivRemWithAnyConstant(instruction, imm);
6068   }
6069 }
6070 
GenerateIntRem(HRem * instruction)6071 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) {
6072   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
6073          << instruction->GetResultType();
6074 
6075   if (instruction->GetLocations()->InAt(1).IsConstant()) {
6076     GenerateIntRemForConstDenom(instruction);
6077   } else {
6078     Register out = OutputRegister(instruction);
6079     Register dividend = InputRegisterAt(instruction, 0);
6080     Register divisor = InputRegisterAt(instruction, 1);
6081     UseScratchRegisterScope temps(GetVIXLAssembler());
6082     Register temp = temps.AcquireSameSizeAs(out);
6083     __ Sdiv(temp, dividend, divisor);
6084     __ Msub(out, temp, divisor, dividend);
6085   }
6086 }
6087 
VisitRem(HRem * rem)6088 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
6089   DataType::Type type = rem->GetResultType();
6090 
6091   switch (type) {
6092     case DataType::Type::kInt32:
6093     case DataType::Type::kInt64: {
6094       GenerateIntRem(rem);
6095       break;
6096     }
6097 
6098     case DataType::Type::kFloat32:
6099     case DataType::Type::kFloat64: {
6100       QuickEntrypointEnum entrypoint =
6101           (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
6102       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
6103       if (type == DataType::Type::kFloat32) {
6104         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
6105       } else {
6106         CheckEntrypointTypes<kQuickFmod, double, double, double>();
6107       }
6108       break;
6109     }
6110 
6111     default:
6112       LOG(FATAL) << "Unexpected rem type " << type;
6113       UNREACHABLE();
6114   }
6115 }
6116 
VisitMin(HMin * min)6117 void LocationsBuilderARM64::VisitMin(HMin* min) {
6118   HandleBinaryOp(min);
6119 }
6120 
VisitMin(HMin * min)6121 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
6122   HandleBinaryOp(min);
6123 }
6124 
VisitMax(HMax * max)6125 void LocationsBuilderARM64::VisitMax(HMax* max) {
6126   HandleBinaryOp(max);
6127 }
6128 
VisitMax(HMax * max)6129 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
6130   HandleBinaryOp(max);
6131 }
6132 
VisitAbs(HAbs * abs)6133 void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
6134   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
6135   switch (abs->GetResultType()) {
6136     case DataType::Type::kInt32:
6137     case DataType::Type::kInt64:
6138       locations->SetInAt(0, Location::RequiresRegister());
6139       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6140       break;
6141     case DataType::Type::kFloat32:
6142     case DataType::Type::kFloat64:
6143       locations->SetInAt(0, Location::RequiresFpuRegister());
6144       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6145       break;
6146     default:
6147       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
6148   }
6149 }
6150 
VisitAbs(HAbs * abs)6151 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
6152   switch (abs->GetResultType()) {
6153     case DataType::Type::kInt32:
6154     case DataType::Type::kInt64: {
6155       Register in_reg = InputRegisterAt(abs, 0);
6156       Register out_reg = OutputRegister(abs);
6157       __ Cmp(in_reg, Operand(0));
6158       __ Cneg(out_reg, in_reg, lt);
6159       break;
6160     }
6161     case DataType::Type::kFloat32:
6162     case DataType::Type::kFloat64: {
6163       VRegister in_reg = InputFPRegisterAt(abs, 0);
6164       VRegister out_reg = OutputFPRegister(abs);
6165       __ Fabs(out_reg, in_reg);
6166       break;
6167     }
6168     default:
6169       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
6170   }
6171 }
6172 
VisitConstructorFence(HConstructorFence * constructor_fence)6173 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
6174   constructor_fence->SetLocations(nullptr);
6175 }
6176 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)6177 void InstructionCodeGeneratorARM64::VisitConstructorFence(
6178     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
6179   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
6180 }
6181 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6182 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6183   memory_barrier->SetLocations(nullptr);
6184 }
6185 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)6186 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
6187   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
6188 }
6189 
VisitReturn(HReturn * instruction)6190 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
6191   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6192   DataType::Type return_type = instruction->InputAt(0)->GetType();
6193   locations->SetInAt(0, ARM64ReturnLocation(return_type));
6194 }
6195 
VisitReturn(HReturn * ret)6196 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* ret) {
6197   if (GetGraph()->IsCompilingOsr()) {
6198     // To simplify callers of an OSR method, we put the return value in both
6199     // floating point and core register.
6200     switch (ret->InputAt(0)->GetType()) {
6201       case DataType::Type::kFloat32:
6202         __ Fmov(w0, s0);
6203         break;
6204       case DataType::Type::kFloat64:
6205         __ Fmov(x0, d0);
6206         break;
6207       default:
6208         break;
6209     }
6210   }
6211   codegen_->GenerateFrameExit();
6212 }
6213 
VisitReturnVoid(HReturnVoid * instruction)6214 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
6215   instruction->SetLocations(nullptr);
6216 }
6217 
VisitReturnVoid(HReturnVoid * instruction ATTRIBUTE_UNUSED)6218 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
6219   codegen_->GenerateFrameExit();
6220 }
6221 
VisitRor(HRor * ror)6222 void LocationsBuilderARM64::VisitRor(HRor* ror) {
6223   HandleBinaryOp(ror);
6224 }
6225 
VisitRor(HRor * ror)6226 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
6227   HandleBinaryOp(ror);
6228 }
6229 
VisitShl(HShl * shl)6230 void LocationsBuilderARM64::VisitShl(HShl* shl) {
6231   HandleShift(shl);
6232 }
6233 
VisitShl(HShl * shl)6234 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
6235   HandleShift(shl);
6236 }
6237 
VisitShr(HShr * shr)6238 void LocationsBuilderARM64::VisitShr(HShr* shr) {
6239   HandleShift(shr);
6240 }
6241 
VisitShr(HShr * shr)6242 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
6243   HandleShift(shr);
6244 }
6245 
VisitSub(HSub * instruction)6246 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
6247   HandleBinaryOp(instruction);
6248 }
6249 
VisitSub(HSub * instruction)6250 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
6251   HandleBinaryOp(instruction);
6252 }
6253 
VisitStaticFieldGet(HStaticFieldGet * instruction)6254 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6255   HandleFieldGet(instruction, instruction->GetFieldInfo());
6256 }
6257 
VisitStaticFieldGet(HStaticFieldGet * instruction)6258 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6259   HandleFieldGet(instruction, instruction->GetFieldInfo());
6260 }
6261 
VisitStaticFieldSet(HStaticFieldSet * instruction)6262 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6263   HandleFieldSet(instruction);
6264 }
6265 
VisitStaticFieldSet(HStaticFieldSet * instruction)6266 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6267   HandleFieldSet(instruction,
6268                  instruction->GetFieldInfo(),
6269                  instruction->GetValueCanBeNull(),
6270                  instruction->GetWriteBarrierKind());
6271 }
6272 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6273 void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6274   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0));
6275 }
6276 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6277 void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6278   __ Mov(w0, instruction->GetFormat()->GetValue());
6279   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6280 }
6281 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6282 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
6283     HUnresolvedInstanceFieldGet* instruction) {
6284   FieldAccessCallingConventionARM64 calling_convention;
6285   codegen_->CreateUnresolvedFieldLocationSummary(
6286       instruction, instruction->GetFieldType(), calling_convention);
6287 }
6288 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6289 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
6290     HUnresolvedInstanceFieldGet* instruction) {
6291   FieldAccessCallingConventionARM64 calling_convention;
6292   codegen_->GenerateUnresolvedFieldAccess(instruction,
6293                                           instruction->GetFieldType(),
6294                                           instruction->GetFieldIndex(),
6295                                           instruction->GetDexPc(),
6296                                           calling_convention);
6297 }
6298 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6299 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
6300     HUnresolvedInstanceFieldSet* instruction) {
6301   FieldAccessCallingConventionARM64 calling_convention;
6302   codegen_->CreateUnresolvedFieldLocationSummary(
6303       instruction, instruction->GetFieldType(), calling_convention);
6304 }
6305 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6306 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
6307     HUnresolvedInstanceFieldSet* instruction) {
6308   FieldAccessCallingConventionARM64 calling_convention;
6309   codegen_->GenerateUnresolvedFieldAccess(instruction,
6310                                           instruction->GetFieldType(),
6311                                           instruction->GetFieldIndex(),
6312                                           instruction->GetDexPc(),
6313                                           calling_convention);
6314 }
6315 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6316 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
6317     HUnresolvedStaticFieldGet* instruction) {
6318   FieldAccessCallingConventionARM64 calling_convention;
6319   codegen_->CreateUnresolvedFieldLocationSummary(
6320       instruction, instruction->GetFieldType(), calling_convention);
6321 }
6322 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6323 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
6324     HUnresolvedStaticFieldGet* instruction) {
6325   FieldAccessCallingConventionARM64 calling_convention;
6326   codegen_->GenerateUnresolvedFieldAccess(instruction,
6327                                           instruction->GetFieldType(),
6328                                           instruction->GetFieldIndex(),
6329                                           instruction->GetDexPc(),
6330                                           calling_convention);
6331 }
6332 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6333 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
6334     HUnresolvedStaticFieldSet* instruction) {
6335   FieldAccessCallingConventionARM64 calling_convention;
6336   codegen_->CreateUnresolvedFieldLocationSummary(
6337       instruction, instruction->GetFieldType(), calling_convention);
6338 }
6339 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6340 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
6341     HUnresolvedStaticFieldSet* instruction) {
6342   FieldAccessCallingConventionARM64 calling_convention;
6343   codegen_->GenerateUnresolvedFieldAccess(instruction,
6344                                           instruction->GetFieldType(),
6345                                           instruction->GetFieldIndex(),
6346                                           instruction->GetDexPc(),
6347                                           calling_convention);
6348 }
6349 
VisitSuspendCheck(HSuspendCheck * instruction)6350 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6351   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6352       instruction, LocationSummary::kCallOnSlowPath);
6353   // In suspend check slow path, usually there are no caller-save registers at all.
6354   // If SIMD instructions are present, however, we force spilling all live SIMD
6355   // registers in full width (since the runtime only saves/restores lower part).
6356   locations->SetCustomSlowPathCallerSaves(
6357       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6358 }
6359 
VisitSuspendCheck(HSuspendCheck * instruction)6360 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
6361   HBasicBlock* block = instruction->GetBlock();
6362   if (block->GetLoopInformation() != nullptr) {
6363     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6364     // The back edge will generate the suspend check.
6365     return;
6366   }
6367   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6368     // The goto will generate the suspend check.
6369     return;
6370   }
6371   GenerateSuspendCheck(instruction, nullptr);
6372   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6373 }
6374 
VisitThrow(HThrow * instruction)6375 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
6376   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6377       instruction, LocationSummary::kCallOnMainOnly);
6378   InvokeRuntimeCallingConvention calling_convention;
6379   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
6380 }
6381 
VisitThrow(HThrow * instruction)6382 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
6383   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6384   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6385 }
6386 
VisitTypeConversion(HTypeConversion * conversion)6387 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
6388   LocationSummary* locations =
6389       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
6390   DataType::Type input_type = conversion->GetInputType();
6391   DataType::Type result_type = conversion->GetResultType();
6392   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6393       << input_type << " -> " << result_type;
6394   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
6395       (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
6396     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
6397   }
6398 
6399   if (DataType::IsFloatingPointType(input_type)) {
6400     locations->SetInAt(0, Location::RequiresFpuRegister());
6401   } else {
6402     locations->SetInAt(0, Location::RequiresRegister());
6403   }
6404 
6405   if (DataType::IsFloatingPointType(result_type)) {
6406     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6407   } else {
6408     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6409   }
6410 }
6411 
VisitTypeConversion(HTypeConversion * conversion)6412 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
6413   DataType::Type result_type = conversion->GetResultType();
6414   DataType::Type input_type = conversion->GetInputType();
6415 
6416   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
6417       << input_type << " -> " << result_type;
6418 
6419   if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
6420     int result_size = DataType::Size(result_type);
6421     int input_size = DataType::Size(input_type);
6422     int min_size = std::min(result_size, input_size);
6423     Register output = OutputRegister(conversion);
6424     Register source = InputRegisterAt(conversion, 0);
6425     if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
6426       // 'int' values are used directly as W registers, discarding the top
6427       // bits, so we don't need to sign-extend and can just perform a move.
6428       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
6429       // top 32 bits of the target register. We theoretically could leave those
6430       // bits unchanged, but we would have to make sure that no code uses a
6431       // 32bit input value as a 64bit value assuming that the top 32 bits are
6432       // zero.
6433       __ Mov(output.W(), source.W());
6434     } else if (DataType::IsUnsignedType(result_type) ||
6435                (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
6436       __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
6437     } else {
6438       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
6439     }
6440   } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
6441     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
6442   } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
6443     CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
6444     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
6445   } else if (DataType::IsFloatingPointType(result_type) &&
6446              DataType::IsFloatingPointType(input_type)) {
6447     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
6448   } else {
6449     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
6450                 << " to " << result_type;
6451   }
6452 }
6453 
VisitUShr(HUShr * ushr)6454 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
6455   HandleShift(ushr);
6456 }
6457 
VisitUShr(HUShr * ushr)6458 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
6459   HandleShift(ushr);
6460 }
6461 
VisitXor(HXor * instruction)6462 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
6463   HandleBinaryOp(instruction);
6464 }
6465 
VisitXor(HXor * instruction)6466 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
6467   HandleBinaryOp(instruction);
6468 }
6469 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6470 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6471   // Nothing to do, this should be removed during prepare for register allocator.
6472   LOG(FATAL) << "Unreachable";
6473 }
6474 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6475 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6476   // Nothing to do, this should be removed during prepare for register allocator.
6477   LOG(FATAL) << "Unreachable";
6478 }
6479 
6480 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6481 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6482   LocationSummary* locations =
6483       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6484   locations->SetInAt(0, Location::RequiresRegister());
6485 }
6486 
VisitPackedSwitch(HPackedSwitch * switch_instr)6487 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6488   int32_t lower_bound = switch_instr->GetStartValue();
6489   uint32_t num_entries = switch_instr->GetNumEntries();
6490   Register value_reg = InputRegisterAt(switch_instr, 0);
6491   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6492 
6493   // Roughly set 16 as max average assemblies generated per HIR in a graph.
6494   static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
6495   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
6496   // make sure we don't emit it if the target may run out of range.
6497   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
6498   // ranges and emit the tables only as required.
6499   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
6500 
6501   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
6502       // Current instruction id is an upper bound of the number of HIRs in the graph.
6503       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
6504     // Create a series of compare/jumps.
6505     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6506     Register temp = temps.AcquireW();
6507     __ Subs(temp, value_reg, Operand(lower_bound));
6508 
6509     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6510     // Jump to successors[0] if value == lower_bound.
6511     __ B(eq, codegen_->GetLabelOf(successors[0]));
6512     int32_t last_index = 0;
6513     for (; num_entries - last_index > 2; last_index += 2) {
6514       __ Subs(temp, temp, Operand(2));
6515       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
6516       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
6517       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
6518       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
6519     }
6520     if (num_entries - last_index == 2) {
6521       // The last missing case_value.
6522       __ Cmp(temp, Operand(1));
6523       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
6524     }
6525 
6526     // And the default for any other value.
6527     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6528       __ B(codegen_->GetLabelOf(default_block));
6529     }
6530   } else {
6531     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
6532 
6533     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
6534 
6535     // Below instructions should use at most one blocked register. Since there are two blocked
6536     // registers, we are free to block one.
6537     Register temp_w = temps.AcquireW();
6538     Register index;
6539     // Remove the bias.
6540     if (lower_bound != 0) {
6541       index = temp_w;
6542       __ Sub(index, value_reg, Operand(lower_bound));
6543     } else {
6544       index = value_reg;
6545     }
6546 
6547     // Jump to default block if index is out of the range.
6548     __ Cmp(index, Operand(num_entries));
6549     __ B(hs, codegen_->GetLabelOf(default_block));
6550 
6551     // In current VIXL implementation, it won't require any blocked registers to encode the
6552     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
6553     // register pressure.
6554     Register table_base = temps.AcquireX();
6555     // Load jump offset from the table.
6556     __ Adr(table_base, jump_table->GetTableStartLabel());
6557     Register jump_offset = temp_w;
6558     __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
6559 
6560     // Jump to target block by branching to table_base(pc related) + offset.
6561     Register target_address = table_base;
6562     __ Add(target_address, table_base, Operand(jump_offset, SXTW));
6563     __ Br(target_address);
6564   }
6565 }
6566 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6567 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
6568     HInstruction* instruction,
6569     Location out,
6570     uint32_t offset,
6571     Location maybe_temp,
6572     ReadBarrierOption read_barrier_option) {
6573   DataType::Type type = DataType::Type::kReference;
6574   Register out_reg = RegisterFrom(out, type);
6575   if (read_barrier_option == kWithReadBarrier) {
6576     CHECK(gUseReadBarrier);
6577     if (kUseBakerReadBarrier) {
6578       // Load with fast path based Baker's read barrier.
6579       // /* HeapReference<Object> */ out = *(out + offset)
6580       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6581                                                       out,
6582                                                       out_reg,
6583                                                       offset,
6584                                                       maybe_temp,
6585                                                       /* needs_null_check= */ false,
6586                                                       /* use_load_acquire= */ false);
6587     } else {
6588       // Load with slow path based read barrier.
6589       // Save the value of `out` into `maybe_temp` before overwriting it
6590       // in the following move operation, as we will need it for the
6591       // read barrier below.
6592       Register temp_reg = RegisterFrom(maybe_temp, type);
6593       __ Mov(temp_reg, out_reg);
6594       // /* HeapReference<Object> */ out = *(out + offset)
6595       __ Ldr(out_reg, HeapOperand(out_reg, offset));
6596       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6597     }
6598   } else {
6599     // Plain load with no read barrier.
6600     // /* HeapReference<Object> */ out = *(out + offset)
6601     __ Ldr(out_reg, HeapOperand(out_reg, offset));
6602     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6603   }
6604 }
6605 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6606 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
6607     HInstruction* instruction,
6608     Location out,
6609     Location obj,
6610     uint32_t offset,
6611     Location maybe_temp,
6612     ReadBarrierOption read_barrier_option) {
6613   DataType::Type type = DataType::Type::kReference;
6614   Register out_reg = RegisterFrom(out, type);
6615   Register obj_reg = RegisterFrom(obj, type);
6616   if (read_barrier_option == kWithReadBarrier) {
6617     CHECK(gUseReadBarrier);
6618     if (kUseBakerReadBarrier) {
6619       // Load with fast path based Baker's read barrier.
6620       // /* HeapReference<Object> */ out = *(obj + offset)
6621       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
6622                                                       out,
6623                                                       obj_reg,
6624                                                       offset,
6625                                                       maybe_temp,
6626                                                       /* needs_null_check= */ false,
6627                                                       /* use_load_acquire= */ false);
6628     } else {
6629       // Load with slow path based read barrier.
6630       // /* HeapReference<Object> */ out = *(obj + offset)
6631       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6632       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6633     }
6634   } else {
6635     // Plain load with no read barrier.
6636     // /* HeapReference<Object> */ out = *(obj + offset)
6637     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
6638     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
6639   }
6640 }
6641 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)6642 void CodeGeneratorARM64::GenerateGcRootFieldLoad(
6643     HInstruction* instruction,
6644     Location root,
6645     Register obj,
6646     uint32_t offset,
6647     vixl::aarch64::Label* fixup_label,
6648     ReadBarrierOption read_barrier_option) {
6649   DCHECK(fixup_label == nullptr || offset == 0u);
6650   Register root_reg = RegisterFrom(root, DataType::Type::kReference);
6651   if (read_barrier_option == kWithReadBarrier) {
6652     DCHECK(gUseReadBarrier);
6653     if (kUseBakerReadBarrier) {
6654       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6655       // Baker's read barrier are used.
6656 
6657       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
6658       // the Marking Register) to decide whether we need to enter
6659       // the slow path to mark the GC root.
6660       //
6661       // We use shared thunks for the slow path; shared within the method
6662       // for JIT, across methods for AOT. That thunk checks the reference
6663       // and jumps to the entrypoint if needed.
6664       //
6665       //     lr = &return_address;
6666       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
6667       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6668       //       goto gc_root_thunk<root_reg>(lr)
6669       //     }
6670       //   return_address:
6671 
6672       UseScratchRegisterScope temps(GetVIXLAssembler());
6673       DCHECK(temps.IsAvailable(ip0));
6674       DCHECK(temps.IsAvailable(ip1));
6675       temps.Exclude(ip0, ip1);
6676       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
6677 
6678       ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6679       vixl::aarch64::Label return_address;
6680       __ adr(lr, &return_address);
6681       if (fixup_label != nullptr) {
6682         __ bind(fixup_label);
6683       }
6684       static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6685                     "GC root LDR must be 2 instructions (8B) before the return address label.");
6686       __ ldr(root_reg, MemOperand(obj.X(), offset));
6687       EmitBakerReadBarrierCbnz(custom_data);
6688       __ bind(&return_address);
6689     } else {
6690       // GC root loaded through a slow path for read barriers other
6691       // than Baker's.
6692       // /* GcRoot<mirror::Object>* */ root = obj + offset
6693       if (fixup_label == nullptr) {
6694         __ Add(root_reg.X(), obj.X(), offset);
6695       } else {
6696         EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6697       }
6698       // /* mirror::Object* */ root = root->Read()
6699       GenerateReadBarrierForRootSlow(instruction, root, root);
6700     }
6701   } else {
6702     // Plain GC root load with no read barrier.
6703     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6704     if (fixup_label == nullptr) {
6705       __ Ldr(root_reg, MemOperand(obj, offset));
6706     } else {
6707       EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6708     }
6709     // Note that GC roots are not affected by heap poisoning, thus we
6710     // do not have to unpoison `root_reg` here.
6711   }
6712   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6713 }
6714 
GenerateIntrinsicCasMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,vixl::aarch64::Register old_value)6715 void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier(
6716     vixl::aarch64::Register marked_old_value,
6717     vixl::aarch64::Register old_value) {
6718   DCHECK(gUseReadBarrier);
6719   DCHECK(kUseBakerReadBarrier);
6720 
6721   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
6722   uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode());
6723 
6724   ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6725   vixl::aarch64::Label return_address;
6726   __ adr(lr, &return_address);
6727   static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6728                 "GC root LDR must be 2 instructions (8B) before the return address label.");
6729   __ mov(marked_old_value, old_value);
6730   EmitBakerReadBarrierCbnz(custom_data);
6731   __ bind(&return_address);
6732 }
6733 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl::aarch64::Register obj,const vixl::aarch64::MemOperand & src,bool needs_null_check,bool use_load_acquire)6734 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6735                                                                Location ref,
6736                                                                vixl::aarch64::Register obj,
6737                                                                const vixl::aarch64::MemOperand& src,
6738                                                                bool needs_null_check,
6739                                                                bool use_load_acquire) {
6740   DCHECK(gUseReadBarrier);
6741   DCHECK(kUseBakerReadBarrier);
6742 
6743   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6744   // Marking Register) to decide whether we need to enter the slow
6745   // path to mark the reference. Then, in the slow path, check the
6746   // gray bit in the lock word of the reference's holder (`obj`) to
6747   // decide whether to mark `ref` or not.
6748   //
6749   // We use shared thunks for the slow path; shared within the method
6750   // for JIT, across methods for AOT. That thunk checks the holder
6751   // and jumps to the entrypoint if needed. If the holder is not gray,
6752   // it creates a fake dependency and returns to the LDR instruction.
6753   //
6754   //     lr = &gray_return_address;
6755   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6756   //       goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr)
6757   //     }
6758   //   not_gray_return_address:
6759   //     // Original reference load. If the offset is too large to fit
6760   //     // into LDR, we use an adjusted base register here.
6761   //     HeapReference<mirror::Object> reference = *(obj+offset);
6762   //   gray_return_address:
6763 
6764   DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
6765   DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
6766 
6767   UseScratchRegisterScope temps(GetVIXLAssembler());
6768   DCHECK(temps.IsAvailable(ip0));
6769   DCHECK(temps.IsAvailable(ip1));
6770   temps.Exclude(ip0, ip1);
6771   uint32_t custom_data = use_load_acquire
6772       ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
6773       : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
6774 
6775   {
6776     ExactAssemblyScope guard(GetVIXLAssembler(),
6777                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6778     vixl::aarch64::Label return_address;
6779     __ adr(lr, &return_address);
6780     EmitBakerReadBarrierCbnz(custom_data);
6781     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6782                   "Field LDR must be 1 instruction (4B) before the return address label; "
6783                   " 2 instructions (8B) for heap poisoning.");
6784     Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6785     if (use_load_acquire) {
6786       DCHECK_EQ(src.GetOffset(), 0);
6787       __ ldar(ref_reg, src);
6788     } else {
6789       __ ldr(ref_reg, src);
6790     }
6791     if (needs_null_check) {
6792       MaybeRecordImplicitNullCheck(instruction);
6793     }
6794     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6795     // macro instructions disallowed in ExactAssemblyScope.
6796     if (kPoisonHeapReferences) {
6797       __ neg(ref_reg, Operand(ref_reg));
6798     }
6799     __ bind(&return_address);
6800   }
6801   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6802 }
6803 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)6804 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6805                                                                Location ref,
6806                                                                Register obj,
6807                                                                uint32_t offset,
6808                                                                Location maybe_temp,
6809                                                                bool needs_null_check,
6810                                                                bool use_load_acquire) {
6811   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6812   Register base = obj;
6813   if (use_load_acquire) {
6814     DCHECK(maybe_temp.IsRegister());
6815     base = WRegisterFrom(maybe_temp);
6816     __ Add(base, obj, offset);
6817     offset = 0u;
6818   } else if (offset >= kReferenceLoadMinFarOffset) {
6819     DCHECK(maybe_temp.IsRegister());
6820     base = WRegisterFrom(maybe_temp);
6821     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6822     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6823     offset &= (kReferenceLoadMinFarOffset - 1u);
6824   }
6825   MemOperand src(base.X(), offset);
6826   GenerateFieldLoadWithBakerReadBarrier(
6827       instruction, ref, obj, src, needs_null_check, use_load_acquire);
6828 }
6829 
GenerateArrayLoadWithBakerReadBarrier(HArrayGet * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)6830 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
6831                                                                Location ref,
6832                                                                Register obj,
6833                                                                uint32_t data_offset,
6834                                                                Location index,
6835                                                                bool needs_null_check) {
6836   DCHECK(gUseReadBarrier);
6837   DCHECK(kUseBakerReadBarrier);
6838 
6839   static_assert(
6840       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6841       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6842   size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
6843 
6844   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6845   // Marking Register) to decide whether we need to enter the slow
6846   // path to mark the reference. Then, in the slow path, check the
6847   // gray bit in the lock word of the reference's holder (`obj`) to
6848   // decide whether to mark `ref` or not.
6849   //
6850   // We use shared thunks for the slow path; shared within the method
6851   // for JIT, across methods for AOT. That thunk checks the holder
6852   // and jumps to the entrypoint if needed. If the holder is not gray,
6853   // it creates a fake dependency and returns to the LDR instruction.
6854   //
6855   //     lr = &gray_return_address;
6856   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6857   //       goto array_thunk<base_reg>(lr)
6858   //     }
6859   //   not_gray_return_address:
6860   //     // Original reference load. If the offset is too large to fit
6861   //     // into LDR, we use an adjusted base register here.
6862   //     HeapReference<mirror::Object> reference = data[index];
6863   //   gray_return_address:
6864 
6865   DCHECK(index.IsValid());
6866   Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
6867   Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6868 
6869   UseScratchRegisterScope temps(GetVIXLAssembler());
6870   DCHECK(temps.IsAvailable(ip0));
6871   DCHECK(temps.IsAvailable(ip1));
6872   temps.Exclude(ip0, ip1);
6873 
6874   Register temp;
6875   if (instruction->GetArray()->IsIntermediateAddress()) {
6876     // We do not need to compute the intermediate address from the array: the
6877     // input instruction has done it already. See the comment in
6878     // `TryExtractArrayAccessAddress()`.
6879     if (kIsDebugBuild) {
6880       HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
6881       DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
6882     }
6883     temp = obj;
6884   } else {
6885     temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
6886     __ Add(temp.X(), obj.X(), Operand(data_offset));
6887   }
6888 
6889   uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
6890 
6891   {
6892     ExactAssemblyScope guard(GetVIXLAssembler(),
6893                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6894     vixl::aarch64::Label return_address;
6895     __ adr(lr, &return_address);
6896     EmitBakerReadBarrierCbnz(custom_data);
6897     static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6898                   "Array LDR must be 1 instruction (4B) before the return address label; "
6899                   " 2 instructions (8B) for heap poisoning.");
6900     __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
6901     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
6902     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6903     // macro instructions disallowed in ExactAssemblyScope.
6904     if (kPoisonHeapReferences) {
6905       __ neg(ref_reg, Operand(ref_reg));
6906     }
6907     __ bind(&return_address);
6908   }
6909   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6910 }
6911 
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)6912 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
6913   // The following condition is a compile-time one, so it does not have a run-time cost.
6914   if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) {
6915     // The following condition is a run-time one; it is executed after the
6916     // previous compile-time test, to avoid penalizing non-debug builds.
6917     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
6918       UseScratchRegisterScope temps(GetVIXLAssembler());
6919       Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
6920       GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
6921     }
6922   }
6923 }
6924 
AddReadBarrierSlowPath(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6925 SlowPathCodeARM64* CodeGeneratorARM64::AddReadBarrierSlowPath(HInstruction* instruction,
6926                                                               Location out,
6927                                                               Location ref,
6928                                                               Location obj,
6929                                                               uint32_t offset,
6930                                                               Location index) {
6931   SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
6932       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6933   AddSlowPath(slow_path);
6934   return slow_path;
6935 }
6936 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6937 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6938                                                  Location out,
6939                                                  Location ref,
6940                                                  Location obj,
6941                                                  uint32_t offset,
6942                                                  Location index) {
6943   DCHECK(gUseReadBarrier);
6944 
6945   // Insert a slow path based read barrier *after* the reference load.
6946   //
6947   // If heap poisoning is enabled, the unpoisoning of the loaded
6948   // reference will be carried out by the runtime within the slow
6949   // path.
6950   //
6951   // Note that `ref` currently does not get unpoisoned (when heap
6952   // poisoning is enabled), which is alright as the `ref` argument is
6953   // not used by the artReadBarrierSlow entry point.
6954   //
6955   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6956   SlowPathCodeARM64* slow_path = AddReadBarrierSlowPath(instruction, out, ref, obj, offset, index);
6957 
6958   __ B(slow_path->GetEntryLabel());
6959   __ Bind(slow_path->GetExitLabel());
6960 }
6961 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6962 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6963                                                       Location out,
6964                                                       Location ref,
6965                                                       Location obj,
6966                                                       uint32_t offset,
6967                                                       Location index) {
6968   if (gUseReadBarrier) {
6969     // Baker's read barriers shall be handled by the fast path
6970     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6971     DCHECK(!kUseBakerReadBarrier);
6972     // If heap poisoning is enabled, unpoisoning will be taken care of
6973     // by the runtime within the slow path.
6974     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6975   } else if (kPoisonHeapReferences) {
6976     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6977   }
6978 }
6979 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6980 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6981                                                         Location out,
6982                                                         Location root) {
6983   DCHECK(gUseReadBarrier);
6984 
6985   // Insert a slow path based read barrier *after* the GC root load.
6986   //
6987   // Note that GC roots are not affected by heap poisoning, so we do
6988   // not need to do anything special for this here.
6989   SlowPathCodeARM64* slow_path =
6990       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6991   AddSlowPath(slow_path);
6992 
6993   __ B(slow_path->GetEntryLabel());
6994   __ Bind(slow_path->GetExitLabel());
6995 }
6996 
VisitClassTableGet(HClassTableGet * instruction)6997 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6998   LocationSummary* locations =
6999       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7000   locations->SetInAt(0, Location::RequiresRegister());
7001   locations->SetOut(Location::RequiresRegister());
7002 }
7003 
VisitClassTableGet(HClassTableGet * instruction)7004 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
7005   LocationSummary* locations = instruction->GetLocations();
7006   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
7007     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
7008         instruction->GetIndex(), kArm64PointerSize).SizeValue();
7009     __ Ldr(XRegisterFrom(locations->Out()),
7010            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
7011   } else {
7012     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
7013         instruction->GetIndex(), kArm64PointerSize));
7014     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
7015         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
7016     __ Ldr(XRegisterFrom(locations->Out()),
7017            MemOperand(XRegisterFrom(locations->Out()), method_offset));
7018   }
7019 }
7020 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,vixl::aarch64::Literal<uint32_t> * literal,uint64_t index_in_table)7021 static void PatchJitRootUse(uint8_t* code,
7022                             const uint8_t* roots_data,
7023                             vixl::aarch64::Literal<uint32_t>* literal,
7024                             uint64_t index_in_table) {
7025   uint32_t literal_offset = literal->GetOffset();
7026   uintptr_t address =
7027       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7028   uint8_t* data = code + literal_offset;
7029   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
7030 }
7031 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7032 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7033   for (const auto& entry : jit_string_patches_) {
7034     const StringReference& string_reference = entry.first;
7035     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
7036     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
7037     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
7038   }
7039   for (const auto& entry : jit_class_patches_) {
7040     const TypeReference& type_reference = entry.first;
7041     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
7042     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
7043     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
7044   }
7045 }
7046 
VecNEONAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)7047 MemOperand InstructionCodeGeneratorARM64::VecNEONAddress(
7048     HVecMemoryOperation* instruction,
7049     UseScratchRegisterScope* temps_scope,
7050     size_t size,
7051     bool is_string_char_at,
7052     /*out*/ Register* scratch) {
7053   LocationSummary* locations = instruction->GetLocations();
7054   Register base = InputRegisterAt(instruction, 0);
7055 
7056   if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
7057     DCHECK(!is_string_char_at);
7058     return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
7059   }
7060 
7061   Location index = locations->InAt(1);
7062   uint32_t offset = is_string_char_at
7063       ? mirror::String::ValueOffset().Uint32Value()
7064       : mirror::Array::DataOffset(size).Uint32Value();
7065   size_t shift = ComponentSizeShiftWidth(size);
7066 
7067   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
7068   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
7069 
7070   if (index.IsConstant()) {
7071     offset += Int64FromLocation(index) << shift;
7072     return HeapOperand(base, offset);
7073   } else {
7074     *scratch = temps_scope->AcquireSameSizeAs(base);
7075     __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
7076     return HeapOperand(*scratch, offset);
7077   }
7078 }
7079 
VecSVEAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)7080 SVEMemOperand InstructionCodeGeneratorARM64::VecSVEAddress(
7081     HVecMemoryOperation* instruction,
7082     UseScratchRegisterScope* temps_scope,
7083     size_t size,
7084     bool is_string_char_at,
7085     /*out*/ Register* scratch) {
7086   LocationSummary* locations = instruction->GetLocations();
7087   Register base = InputRegisterAt(instruction, 0);
7088   Location index = locations->InAt(1);
7089 
7090   DCHECK(!instruction->InputAt(1)->IsIntermediateAddressIndex());
7091   DCHECK(!index.IsConstant());
7092 
7093   uint32_t offset = is_string_char_at
7094       ? mirror::String::ValueOffset().Uint32Value()
7095       : mirror::Array::DataOffset(size).Uint32Value();
7096   size_t shift = ComponentSizeShiftWidth(size);
7097 
7098   if (instruction->InputAt(0)->IsIntermediateAddress()) {
7099     return SVEMemOperand(base.X(), XRegisterFrom(index), LSL, shift);
7100   }
7101 
7102   *scratch = temps_scope->AcquireSameSizeAs(base);
7103   __ Add(*scratch, base, offset);
7104   return SVEMemOperand(scratch->X(), XRegisterFrom(index), LSL, shift);
7105 }
7106 
7107 #undef __
7108 #undef QUICK_ENTRY_POINT
7109 
7110 #define __ assembler.GetVIXLAssembler()->
7111 
EmitGrayCheckAndFastPath(arm64::Arm64Assembler & assembler,vixl::aarch64::Register base_reg,vixl::aarch64::MemOperand & lock_word,vixl::aarch64::Label * slow_path,vixl::aarch64::Label * throw_npe=nullptr)7112 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
7113                                      vixl::aarch64::Register base_reg,
7114                                      vixl::aarch64::MemOperand& lock_word,
7115                                      vixl::aarch64::Label* slow_path,
7116                                      vixl::aarch64::Label* throw_npe = nullptr) {
7117   vixl::aarch64::Label throw_npe_cont;
7118   // Load the lock word containing the rb_state.
7119   __ Ldr(ip0.W(), lock_word);
7120   // Given the numeric representation, it's enough to check the low bit of the rb_state.
7121   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7122   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7123   __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
7124   static_assert(
7125       BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
7126       "Field and array LDR offsets must be the same to reuse the same code.");
7127   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
7128   if (throw_npe != nullptr) {
7129     __ Bind(&throw_npe_cont);
7130   }
7131   // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
7132   static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
7133                 "Field LDR must be 1 instruction (4B) before the return address label; "
7134                 " 2 instructions (8B) for heap poisoning.");
7135   __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
7136   // Introduce a dependency on the lock_word including rb_state,
7137   // to prevent load-load reordering, and without using
7138   // a memory barrier (which would be more expensive).
7139   __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
7140   __ Br(lr);          // And return back to the function.
7141   if (throw_npe != nullptr) {
7142     // Clear IP0 before returning to the fast path.
7143     __ Bind(throw_npe);
7144     __ Mov(ip0.X(), xzr);
7145     __ B(&throw_npe_cont);
7146   }
7147   // Note: The fake dependency is unnecessary for the slow path.
7148 }
7149 
7150 // Load the read barrier introspection entrypoint in register `entrypoint`.
LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler & assembler,vixl::aarch64::Register entrypoint)7151 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
7152                                                        vixl::aarch64::Register entrypoint) {
7153   // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
7154   DCHECK_EQ(ip0.GetCode(), 16u);
7155   const int32_t entry_point_offset =
7156       Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
7157   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
7158 }
7159 
CompileBakerReadBarrierThunk(Arm64Assembler & assembler,uint32_t encoded_data,std::string * debug_name)7160 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
7161                                                       uint32_t encoded_data,
7162                                                       /*out*/ std::string* debug_name) {
7163   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
7164   switch (kind) {
7165     case BakerReadBarrierKind::kField:
7166     case BakerReadBarrierKind::kAcquire: {
7167       Register base_reg =
7168           vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7169       CheckValidReg(base_reg.GetCode());
7170       Register holder_reg =
7171           vixl::aarch64::XRegister(BakerReadBarrierSecondRegField::Decode(encoded_data));
7172       CheckValidReg(holder_reg.GetCode());
7173       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7174       temps.Exclude(ip0, ip1);
7175       // In the case of a field load (with relaxed semantic), if `base_reg` differs from
7176       // `holder_reg`, the offset was too large and we must have emitted (during the construction
7177       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
7178       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
7179       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
7180       // not necessarily do that check before going to the thunk.
7181       //
7182       // In the case of a field load with load-acquire semantics (where `base_reg` always differs
7183       // from `holder_reg`), we also need an explicit null check when implicit null checks are
7184       // allowed, as we do not emit one before going to the thunk.
7185       vixl::aarch64::Label throw_npe_label;
7186       vixl::aarch64::Label* throw_npe = nullptr;
7187       if (GetCompilerOptions().GetImplicitNullChecks() &&
7188           (holder_reg.Is(base_reg) || (kind == BakerReadBarrierKind::kAcquire))) {
7189         throw_npe = &throw_npe_label;
7190         __ Cbz(holder_reg.W(), throw_npe);
7191       }
7192       // Check if the holder is gray and, if not, add fake dependency to the base register
7193       // and return to the LDR instruction to load the reference. Otherwise, use introspection
7194       // to load the reference and call the entrypoint that performs further checks on the
7195       // reference and marks it if needed.
7196       vixl::aarch64::Label slow_path;
7197       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
7198       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
7199       __ Bind(&slow_path);
7200       if (kind == BakerReadBarrierKind::kField) {
7201         MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
7202         __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
7203         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7204         __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
7205         __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
7206       } else {
7207         DCHECK(kind == BakerReadBarrierKind::kAcquire);
7208         DCHECK(!base_reg.Is(holder_reg));
7209         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7210         __ Ldar(ip0.W(), MemOperand(base_reg));
7211       }
7212       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
7213       __ Br(ip1);                           // Jump to the entrypoint.
7214       break;
7215     }
7216     case BakerReadBarrierKind::kArray: {
7217       Register base_reg =
7218           vixl::aarch64::XRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7219       CheckValidReg(base_reg.GetCode());
7220       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7221                 BakerReadBarrierSecondRegField::Decode(encoded_data));
7222       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7223       temps.Exclude(ip0, ip1);
7224       vixl::aarch64::Label slow_path;
7225       int32_t data_offset =
7226           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
7227       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
7228       DCHECK_LT(lock_word.GetOffset(), 0);
7229       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
7230       __ Bind(&slow_path);
7231       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
7232       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
7233       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7234       __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
7235       __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
7236                                             // a switch case target based on the index register.
7237       __ Mov(ip0, base_reg);                // Move the base register to ip0.
7238       __ Br(ip1);                           // Jump to the entrypoint's array switch case.
7239       break;
7240     }
7241     case BakerReadBarrierKind::kGcRoot: {
7242       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
7243       // and it does not have a forwarding address), call the correct introspection entrypoint;
7244       // otherwise return the reference (or the extracted forwarding address).
7245       // There is no gray bit check for GC roots.
7246       Register root_reg =
7247           vixl::aarch64::WRegister(BakerReadBarrierFirstRegField::Decode(encoded_data));
7248       CheckValidReg(root_reg.GetCode());
7249       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7250                 BakerReadBarrierSecondRegField::Decode(encoded_data));
7251       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
7252       temps.Exclude(ip0, ip1);
7253       vixl::aarch64::Label return_label, not_marked, forwarding_address;
7254       __ Cbz(root_reg, &return_label);
7255       MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
7256       __ Ldr(ip0.W(), lock_word);
7257       __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
7258       __ Bind(&return_label);
7259       __ Br(lr);
7260       __ Bind(&not_marked);
7261       __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
7262       __ B(&forwarding_address, mi);
7263       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
7264       // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
7265       // art_quick_read_barrier_mark_introspection_gc_roots.
7266       __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
7267       __ Mov(ip0.W(), root_reg);
7268       __ Br(ip1);
7269       __ Bind(&forwarding_address);
7270       __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
7271       __ Br(lr);
7272       break;
7273     }
7274     default:
7275       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
7276       UNREACHABLE();
7277   }
7278 
7279   // For JIT, the slow path is considered part of the compiled method,
7280   // so JIT should pass null as `debug_name`.
7281   DCHECK_IMPLIES(GetCompilerOptions().IsJitCompiler(), debug_name == nullptr);
7282   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
7283     std::ostringstream oss;
7284     oss << "BakerReadBarrierThunk";
7285     switch (kind) {
7286       case BakerReadBarrierKind::kField:
7287         oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7288             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7289         break;
7290       case BakerReadBarrierKind::kAcquire:
7291         oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
7292             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
7293         break;
7294       case BakerReadBarrierKind::kArray:
7295         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7296         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7297                   BakerReadBarrierSecondRegField::Decode(encoded_data));
7298         break;
7299       case BakerReadBarrierKind::kGcRoot:
7300         oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
7301         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
7302                   BakerReadBarrierSecondRegField::Decode(encoded_data));
7303         break;
7304     }
7305     *debug_name = oss.str();
7306   }
7307 }
7308 
7309 #undef __
7310 
7311 }  // namespace arm64
7312 }  // namespace art
7313