1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "arch/arm64/asm_support_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method.h"
22 #include "base/bit_utils.h"
23 #include "base/bit_utils_iterator.h"
24 #include "class_table.h"
25 #include "code_generator_utils.h"
26 #include "compiled_method.h"
27 #include "entrypoints/quick/quick_entrypoints.h"
28 #include "entrypoints/quick/quick_entrypoints_enum.h"
29 #include "gc/accounting/card_table.h"
30 #include "gc/space/image_space.h"
31 #include "heap_poisoning.h"
32 #include "intrinsics.h"
33 #include "intrinsics_arm64.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "offsets.h"
39 #include "thread.h"
40 #include "utils/arm64/assembler_arm64.h"
41 #include "utils/assembler.h"
42 #include "utils/stack_checks.h"
43
44 using namespace vixl::aarch64; // NOLINT(build/namespaces)
45 using vixl::ExactAssemblyScope;
46 using vixl::CodeBufferCheckScope;
47 using vixl::EmissionCheckScope;
48
49 #ifdef __
50 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
51 #endif
52
53 namespace art {
54
55 template<class MirrorType>
56 class GcRoot;
57
58 namespace arm64 {
59
60 using helpers::ARM64EncodableConstantOrRegister;
61 using helpers::ArtVixlRegCodeCoherentForRegSet;
62 using helpers::CPURegisterFrom;
63 using helpers::DRegisterFrom;
64 using helpers::FPRegisterFrom;
65 using helpers::HeapOperand;
66 using helpers::HeapOperandFrom;
67 using helpers::InputCPURegisterOrZeroRegAt;
68 using helpers::InputFPRegisterAt;
69 using helpers::InputOperandAt;
70 using helpers::InputRegisterAt;
71 using helpers::Int64FromLocation;
72 using helpers::IsConstantZeroBitPattern;
73 using helpers::LocationFrom;
74 using helpers::OperandFromMemOperand;
75 using helpers::OutputCPURegister;
76 using helpers::OutputFPRegister;
77 using helpers::OutputRegister;
78 using helpers::QRegisterFrom;
79 using helpers::RegisterFrom;
80 using helpers::StackOperandFrom;
81 using helpers::VIXLRegCodeFromART;
82 using helpers::WRegisterFrom;
83 using helpers::XRegisterFrom;
84
85 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
86 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
87 // generates less code/data with a small num_entries.
88 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
89
90 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
91 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
92 // For the Baker read barrier implementation using link-time generated thunks we need to split
93 // the offset explicitly.
94 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
95
ARM64Condition(IfCondition cond)96 inline Condition ARM64Condition(IfCondition cond) {
97 switch (cond) {
98 case kCondEQ: return eq;
99 case kCondNE: return ne;
100 case kCondLT: return lt;
101 case kCondLE: return le;
102 case kCondGT: return gt;
103 case kCondGE: return ge;
104 case kCondB: return lo;
105 case kCondBE: return ls;
106 case kCondA: return hi;
107 case kCondAE: return hs;
108 }
109 LOG(FATAL) << "Unreachable";
110 UNREACHABLE();
111 }
112
ARM64FPCondition(IfCondition cond,bool gt_bias)113 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
114 // The ARM64 condition codes can express all the necessary branches, see the
115 // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
116 // There is no dex instruction or HIR that would need the missing conditions
117 // "equal or unordered" or "not equal".
118 switch (cond) {
119 case kCondEQ: return eq;
120 case kCondNE: return ne /* unordered */;
121 case kCondLT: return gt_bias ? cc : lt /* unordered */;
122 case kCondLE: return gt_bias ? ls : le /* unordered */;
123 case kCondGT: return gt_bias ? hi /* unordered */ : gt;
124 case kCondGE: return gt_bias ? cs /* unordered */ : ge;
125 default:
126 LOG(FATAL) << "UNREACHABLE";
127 UNREACHABLE();
128 }
129 }
130
ARM64ReturnLocation(DataType::Type return_type)131 Location ARM64ReturnLocation(DataType::Type return_type) {
132 // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
133 // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
134 // but we use the exact registers for clarity.
135 if (return_type == DataType::Type::kFloat32) {
136 return LocationFrom(s0);
137 } else if (return_type == DataType::Type::kFloat64) {
138 return LocationFrom(d0);
139 } else if (return_type == DataType::Type::kInt64) {
140 return LocationFrom(x0);
141 } else if (return_type == DataType::Type::kVoid) {
142 return Location::NoLocation();
143 } else {
144 return LocationFrom(w0);
145 }
146 }
147
GetReturnLocation(DataType::Type return_type)148 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
149 return ARM64ReturnLocation(return_type);
150 }
151
OneRegInReferenceOutSaveEverythingCallerSaves()152 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
153 InvokeRuntimeCallingConvention calling_convention;
154 RegisterSet caller_saves = RegisterSet::Empty();
155 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
156 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
157 RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
158 DataType::Type::kReference).GetCode());
159 return caller_saves;
160 }
161
162 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
163 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
164 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
165
166 // Calculate memory accessing operand for save/restore live registers.
SaveRestoreLiveRegistersHelper(CodeGenerator * codegen,LocationSummary * locations,int64_t spill_offset,bool is_save)167 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
168 LocationSummary* locations,
169 int64_t spill_offset,
170 bool is_save) {
171 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
172 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
173 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
174 codegen->GetNumberOfCoreRegisters(),
175 fp_spills,
176 codegen->GetNumberOfFloatingPointRegisters()));
177
178 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
179 unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
180 CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
181
182 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
183 UseScratchRegisterScope temps(masm);
184
185 Register base = masm->StackPointer();
186 int64_t core_spill_size = core_list.GetTotalSizeInBytes();
187 int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
188 int64_t reg_size = kXRegSizeInBytes;
189 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
190 uint32_t ls_access_size = WhichPowerOf2(reg_size);
191 if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
192 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
193 // If the offset does not fit in the instruction's immediate field, use an alternate register
194 // to compute the base address(float point registers spill base address).
195 Register new_base = temps.AcquireSameSizeAs(base);
196 __ Add(new_base, base, Operand(spill_offset + core_spill_size));
197 base = new_base;
198 spill_offset = -core_spill_size;
199 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
200 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
201 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
202 }
203
204 if (is_save) {
205 __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
206 __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
207 } else {
208 __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
209 __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
210 }
211 }
212
SaveLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)213 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
214 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
215 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
216 for (uint32_t i : LowToHighBits(core_spills)) {
217 // If the register holds an object, update the stack mask.
218 if (locations->RegisterContainsObject(i)) {
219 locations->SetStackBit(stack_offset / kVRegSize);
220 }
221 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
222 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
223 saved_core_stack_offsets_[i] = stack_offset;
224 stack_offset += kXRegSizeInBytes;
225 }
226
227 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
228 for (uint32_t i : LowToHighBits(fp_spills)) {
229 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
230 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
231 saved_fpu_stack_offsets_[i] = stack_offset;
232 stack_offset += kDRegSizeInBytes;
233 }
234
235 SaveRestoreLiveRegistersHelper(codegen,
236 locations,
237 codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true);
238 }
239
RestoreLiveRegisters(CodeGenerator * codegen,LocationSummary * locations)240 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
241 SaveRestoreLiveRegistersHelper(codegen,
242 locations,
243 codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false);
244 }
245
246 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
247 public:
BoundsCheckSlowPathARM64(HBoundsCheck * instruction)248 explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
249
EmitNativeCode(CodeGenerator * codegen)250 void EmitNativeCode(CodeGenerator* codegen) override {
251 LocationSummary* locations = instruction_->GetLocations();
252 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
253
254 __ Bind(GetEntryLabel());
255 if (instruction_->CanThrowIntoCatchBlock()) {
256 // Live registers will be restored in the catch block if caught.
257 SaveLiveRegisters(codegen, instruction_->GetLocations());
258 }
259 // We're moving two locations to locations that could overlap, so we need a parallel
260 // move resolver.
261 InvokeRuntimeCallingConvention calling_convention;
262 codegen->EmitParallelMoves(locations->InAt(0),
263 LocationFrom(calling_convention.GetRegisterAt(0)),
264 DataType::Type::kInt32,
265 locations->InAt(1),
266 LocationFrom(calling_convention.GetRegisterAt(1)),
267 DataType::Type::kInt32);
268 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
269 ? kQuickThrowStringBounds
270 : kQuickThrowArrayBounds;
271 arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
272 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
273 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
274 }
275
IsFatal() const276 bool IsFatal() const override { return true; }
277
GetDescription() const278 const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; }
279
280 private:
281 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
282 };
283
284 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
285 public:
DivZeroCheckSlowPathARM64(HDivZeroCheck * instruction)286 explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
287
EmitNativeCode(CodeGenerator * codegen)288 void EmitNativeCode(CodeGenerator* codegen) override {
289 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
290 __ Bind(GetEntryLabel());
291 arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
292 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
293 }
294
IsFatal() const295 bool IsFatal() const override { return true; }
296
GetDescription() const297 const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; }
298
299 private:
300 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
301 };
302
303 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
304 public:
LoadClassSlowPathARM64(HLoadClass * cls,HInstruction * at)305 LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at)
306 : SlowPathCodeARM64(at), cls_(cls) {
307 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
308 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
309 }
310
EmitNativeCode(CodeGenerator * codegen)311 void EmitNativeCode(CodeGenerator* codegen) override {
312 LocationSummary* locations = instruction_->GetLocations();
313 Location out = locations->Out();
314 const uint32_t dex_pc = instruction_->GetDexPc();
315 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
316 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
317
318 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
319 __ Bind(GetEntryLabel());
320 SaveLiveRegisters(codegen, locations);
321
322 InvokeRuntimeCallingConvention calling_convention;
323 if (must_resolve_type) {
324 DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()));
325 dex::TypeIndex type_index = cls_->GetTypeIndex();
326 __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
327 arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
328 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
329 // If we also must_do_clinit, the resolved type is now in the correct register.
330 } else {
331 DCHECK(must_do_clinit);
332 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
333 arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)),
334 source,
335 cls_->GetType());
336 }
337 if (must_do_clinit) {
338 arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
339 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
340 }
341
342 // Move the class to the desired location.
343 if (out.IsValid()) {
344 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
345 DataType::Type type = instruction_->GetType();
346 arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
347 }
348 RestoreLiveRegisters(codegen, locations);
349 __ B(GetExitLabel());
350 }
351
GetDescription() const352 const char* GetDescription() const override { return "LoadClassSlowPathARM64"; }
353
354 private:
355 // The class this slow path will load.
356 HLoadClass* const cls_;
357
358 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
359 };
360
361 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
362 public:
LoadStringSlowPathARM64(HLoadString * instruction)363 explicit LoadStringSlowPathARM64(HLoadString* instruction)
364 : SlowPathCodeARM64(instruction) {}
365
EmitNativeCode(CodeGenerator * codegen)366 void EmitNativeCode(CodeGenerator* codegen) override {
367 LocationSummary* locations = instruction_->GetLocations();
368 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
369 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
370
371 __ Bind(GetEntryLabel());
372 SaveLiveRegisters(codegen, locations);
373
374 InvokeRuntimeCallingConvention calling_convention;
375 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
376 __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
377 arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
378 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
379 DataType::Type type = instruction_->GetType();
380 arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
381
382 RestoreLiveRegisters(codegen, locations);
383
384 __ B(GetExitLabel());
385 }
386
GetDescription() const387 const char* GetDescription() const override { return "LoadStringSlowPathARM64"; }
388
389 private:
390 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
391 };
392
393 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
394 public:
NullCheckSlowPathARM64(HNullCheck * instr)395 explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
396
EmitNativeCode(CodeGenerator * codegen)397 void EmitNativeCode(CodeGenerator* codegen) override {
398 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
399 __ Bind(GetEntryLabel());
400 if (instruction_->CanThrowIntoCatchBlock()) {
401 // Live registers will be restored in the catch block if caught.
402 SaveLiveRegisters(codegen, instruction_->GetLocations());
403 }
404 arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
405 instruction_,
406 instruction_->GetDexPc(),
407 this);
408 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
409 }
410
IsFatal() const411 bool IsFatal() const override { return true; }
412
GetDescription() const413 const char* GetDescription() const override { return "NullCheckSlowPathARM64"; }
414
415 private:
416 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
417 };
418
419 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
420 public:
SuspendCheckSlowPathARM64(HSuspendCheck * instruction,HBasicBlock * successor)421 SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
422 : SlowPathCodeARM64(instruction), successor_(successor) {}
423
EmitNativeCode(CodeGenerator * codegen)424 void EmitNativeCode(CodeGenerator* codegen) override {
425 LocationSummary* locations = instruction_->GetLocations();
426 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
427 __ Bind(GetEntryLabel());
428 SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD.
429 arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
430 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
431 RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD.
432 if (successor_ == nullptr) {
433 __ B(GetReturnLabel());
434 } else {
435 __ B(arm64_codegen->GetLabelOf(successor_));
436 }
437 }
438
GetReturnLabel()439 vixl::aarch64::Label* GetReturnLabel() {
440 DCHECK(successor_ == nullptr);
441 return &return_label_;
442 }
443
GetSuccessor() const444 HBasicBlock* GetSuccessor() const {
445 return successor_;
446 }
447
GetDescription() const448 const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; }
449
450 private:
451 // If not null, the block to branch to after the suspend check.
452 HBasicBlock* const successor_;
453
454 // If `successor_` is null, the label to branch to after the suspend check.
455 vixl::aarch64::Label return_label_;
456
457 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
458 };
459
460 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
461 public:
TypeCheckSlowPathARM64(HInstruction * instruction,bool is_fatal)462 TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
463 : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
464
EmitNativeCode(CodeGenerator * codegen)465 void EmitNativeCode(CodeGenerator* codegen) override {
466 LocationSummary* locations = instruction_->GetLocations();
467
468 DCHECK(instruction_->IsCheckCast()
469 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
470 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
471 uint32_t dex_pc = instruction_->GetDexPc();
472
473 __ Bind(GetEntryLabel());
474
475 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
476 SaveLiveRegisters(codegen, locations);
477 }
478
479 // We're moving two locations to locations that could overlap, so we need a parallel
480 // move resolver.
481 InvokeRuntimeCallingConvention calling_convention;
482 codegen->EmitParallelMoves(locations->InAt(0),
483 LocationFrom(calling_convention.GetRegisterAt(0)),
484 DataType::Type::kReference,
485 locations->InAt(1),
486 LocationFrom(calling_convention.GetRegisterAt(1)),
487 DataType::Type::kReference);
488 if (instruction_->IsInstanceOf()) {
489 arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
490 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
491 DataType::Type ret_type = instruction_->GetType();
492 Location ret_loc = calling_convention.GetReturnLocation(ret_type);
493 arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
494 } else {
495 DCHECK(instruction_->IsCheckCast());
496 arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
497 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
498 }
499
500 if (!is_fatal_) {
501 RestoreLiveRegisters(codegen, locations);
502 __ B(GetExitLabel());
503 }
504 }
505
GetDescription() const506 const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; }
IsFatal() const507 bool IsFatal() const override { return is_fatal_; }
508
509 private:
510 const bool is_fatal_;
511
512 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
513 };
514
515 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
516 public:
DeoptimizationSlowPathARM64(HDeoptimize * instruction)517 explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
518 : SlowPathCodeARM64(instruction) {}
519
EmitNativeCode(CodeGenerator * codegen)520 void EmitNativeCode(CodeGenerator* codegen) override {
521 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
522 __ Bind(GetEntryLabel());
523 LocationSummary* locations = instruction_->GetLocations();
524 SaveLiveRegisters(codegen, locations);
525 InvokeRuntimeCallingConvention calling_convention;
526 __ Mov(calling_convention.GetRegisterAt(0),
527 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
528 arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
529 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
530 }
531
GetDescription() const532 const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; }
533
534 private:
535 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
536 };
537
538 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
539 public:
ArraySetSlowPathARM64(HInstruction * instruction)540 explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
541
EmitNativeCode(CodeGenerator * codegen)542 void EmitNativeCode(CodeGenerator* codegen) override {
543 LocationSummary* locations = instruction_->GetLocations();
544 __ Bind(GetEntryLabel());
545 SaveLiveRegisters(codegen, locations);
546
547 InvokeRuntimeCallingConvention calling_convention;
548 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
549 parallel_move.AddMove(
550 locations->InAt(0),
551 LocationFrom(calling_convention.GetRegisterAt(0)),
552 DataType::Type::kReference,
553 nullptr);
554 parallel_move.AddMove(
555 locations->InAt(1),
556 LocationFrom(calling_convention.GetRegisterAt(1)),
557 DataType::Type::kInt32,
558 nullptr);
559 parallel_move.AddMove(
560 locations->InAt(2),
561 LocationFrom(calling_convention.GetRegisterAt(2)),
562 DataType::Type::kReference,
563 nullptr);
564 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
565
566 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
567 arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
568 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
569 RestoreLiveRegisters(codegen, locations);
570 __ B(GetExitLabel());
571 }
572
GetDescription() const573 const char* GetDescription() const override { return "ArraySetSlowPathARM64"; }
574
575 private:
576 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
577 };
578
EmitTable(CodeGeneratorARM64 * codegen)579 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
580 uint32_t num_entries = switch_instr_->GetNumEntries();
581 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
582
583 // We are about to use the assembler to place literals directly. Make sure we have enough
584 // underlying code buffer and we have generated the jump table with right size.
585 EmissionCheckScope scope(codegen->GetVIXLAssembler(),
586 num_entries * sizeof(int32_t),
587 CodeBufferCheckScope::kExactSize);
588
589 __ Bind(&table_start_);
590 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
591 for (uint32_t i = 0; i < num_entries; i++) {
592 vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
593 DCHECK(target_label->IsBound());
594 ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
595 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
596 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
597 Literal<int32_t> literal(jump_offset);
598 __ place(&literal);
599 }
600 }
601
602 // Slow path generating a read barrier for a heap reference.
603 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
604 public:
ReadBarrierForHeapReferenceSlowPathARM64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)605 ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
606 Location out,
607 Location ref,
608 Location obj,
609 uint32_t offset,
610 Location index)
611 : SlowPathCodeARM64(instruction),
612 out_(out),
613 ref_(ref),
614 obj_(obj),
615 offset_(offset),
616 index_(index) {
617 DCHECK(kEmitCompilerReadBarrier);
618 // If `obj` is equal to `out` or `ref`, it means the initial object
619 // has been overwritten by (or after) the heap object reference load
620 // to be instrumented, e.g.:
621 //
622 // __ Ldr(out, HeapOperand(out, class_offset);
623 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
624 //
625 // In that case, we have lost the information about the original
626 // object, and the emitted read barrier cannot work properly.
627 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
628 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
629 }
630
EmitNativeCode(CodeGenerator * codegen)631 void EmitNativeCode(CodeGenerator* codegen) override {
632 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
633 LocationSummary* locations = instruction_->GetLocations();
634 DataType::Type type = DataType::Type::kReference;
635 DCHECK(locations->CanCall());
636 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
637 DCHECK(instruction_->IsInstanceFieldGet() ||
638 instruction_->IsStaticFieldGet() ||
639 instruction_->IsArrayGet() ||
640 instruction_->IsInstanceOf() ||
641 instruction_->IsCheckCast() ||
642 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
643 << "Unexpected instruction in read barrier for heap reference slow path: "
644 << instruction_->DebugName();
645 // The read barrier instrumentation of object ArrayGet
646 // instructions does not support the HIntermediateAddress
647 // instruction.
648 DCHECK(!(instruction_->IsArrayGet() &&
649 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
650
651 __ Bind(GetEntryLabel());
652
653 SaveLiveRegisters(codegen, locations);
654
655 // We may have to change the index's value, but as `index_` is a
656 // constant member (like other "inputs" of this slow path),
657 // introduce a copy of it, `index`.
658 Location index = index_;
659 if (index_.IsValid()) {
660 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
661 if (instruction_->IsArrayGet()) {
662 // Compute the actual memory offset and store it in `index`.
663 Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
664 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
665 if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
666 // We are about to change the value of `index_reg` (see the
667 // calls to vixl::MacroAssembler::Lsl and
668 // vixl::MacroAssembler::Mov below), but it has
669 // not been saved by the previous call to
670 // art::SlowPathCode::SaveLiveRegisters, as it is a
671 // callee-save register --
672 // art::SlowPathCode::SaveLiveRegisters does not consider
673 // callee-save registers, as it has been designed with the
674 // assumption that callee-save registers are supposed to be
675 // handled by the called function. So, as a callee-save
676 // register, `index_reg` _would_ eventually be saved onto
677 // the stack, but it would be too late: we would have
678 // changed its value earlier. Therefore, we manually save
679 // it here into another freely available register,
680 // `free_reg`, chosen of course among the caller-save
681 // registers (as a callee-save `free_reg` register would
682 // exhibit the same problem).
683 //
684 // Note we could have requested a temporary register from
685 // the register allocator instead; but we prefer not to, as
686 // this is a slow path, and we know we can find a
687 // caller-save register that is available.
688 Register free_reg = FindAvailableCallerSaveRegister(codegen);
689 __ Mov(free_reg.W(), index_reg);
690 index_reg = free_reg;
691 index = LocationFrom(index_reg);
692 } else {
693 // The initial register stored in `index_` has already been
694 // saved in the call to art::SlowPathCode::SaveLiveRegisters
695 // (as it is not a callee-save register), so we can freely
696 // use it.
697 }
698 // Shifting the index value contained in `index_reg` by the scale
699 // factor (2) cannot overflow in practice, as the runtime is
700 // unable to allocate object arrays with a size larger than
701 // 2^26 - 1 (that is, 2^28 - 4 bytes).
702 __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
703 static_assert(
704 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
705 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
706 __ Add(index_reg, index_reg, Operand(offset_));
707 } else {
708 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
709 // intrinsics, `index_` is not shifted by a scale factor of 2
710 // (as in the case of ArrayGet), as it is actually an offset
711 // to an object field within an object.
712 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
713 DCHECK(instruction_->GetLocations()->Intrinsified());
714 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
715 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
716 << instruction_->AsInvoke()->GetIntrinsic();
717 DCHECK_EQ(offset_, 0u);
718 DCHECK(index_.IsRegister());
719 }
720 }
721
722 // We're moving two or three locations to locations that could
723 // overlap, so we need a parallel move resolver.
724 InvokeRuntimeCallingConvention calling_convention;
725 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
726 parallel_move.AddMove(ref_,
727 LocationFrom(calling_convention.GetRegisterAt(0)),
728 type,
729 nullptr);
730 parallel_move.AddMove(obj_,
731 LocationFrom(calling_convention.GetRegisterAt(1)),
732 type,
733 nullptr);
734 if (index.IsValid()) {
735 parallel_move.AddMove(index,
736 LocationFrom(calling_convention.GetRegisterAt(2)),
737 DataType::Type::kInt32,
738 nullptr);
739 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
740 } else {
741 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
742 arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
743 }
744 arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
745 instruction_,
746 instruction_->GetDexPc(),
747 this);
748 CheckEntrypointTypes<
749 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
750 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
751
752 RestoreLiveRegisters(codegen, locations);
753
754 __ B(GetExitLabel());
755 }
756
GetDescription() const757 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
758
759 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)760 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
761 size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
762 size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
763 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
764 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
765 return Register(VIXLRegCodeFromART(i), kXRegSize);
766 }
767 }
768 // We shall never fail to find a free caller-save register, as
769 // there are more than two core caller-save registers on ARM64
770 // (meaning it is possible to find one which is different from
771 // `ref` and `obj`).
772 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
773 LOG(FATAL) << "Could not find a free register";
774 UNREACHABLE();
775 }
776
777 const Location out_;
778 const Location ref_;
779 const Location obj_;
780 const uint32_t offset_;
781 // An additional location containing an index to an array.
782 // Only used for HArrayGet and the UnsafeGetObject &
783 // UnsafeGetObjectVolatile intrinsics.
784 const Location index_;
785
786 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
787 };
788
789 // Slow path generating a read barrier for a GC root.
790 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
791 public:
ReadBarrierForRootSlowPathARM64(HInstruction * instruction,Location out,Location root)792 ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
793 : SlowPathCodeARM64(instruction), out_(out), root_(root) {
794 DCHECK(kEmitCompilerReadBarrier);
795 }
796
EmitNativeCode(CodeGenerator * codegen)797 void EmitNativeCode(CodeGenerator* codegen) override {
798 LocationSummary* locations = instruction_->GetLocations();
799 DataType::Type type = DataType::Type::kReference;
800 DCHECK(locations->CanCall());
801 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
802 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
803 << "Unexpected instruction in read barrier for GC root slow path: "
804 << instruction_->DebugName();
805
806 __ Bind(GetEntryLabel());
807 SaveLiveRegisters(codegen, locations);
808
809 InvokeRuntimeCallingConvention calling_convention;
810 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
811 // The argument of the ReadBarrierForRootSlow is not a managed
812 // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
813 // thus we need a 64-bit move here, and we cannot use
814 //
815 // arm64_codegen->MoveLocation(
816 // LocationFrom(calling_convention.GetRegisterAt(0)),
817 // root_,
818 // type);
819 //
820 // which would emit a 32-bit move, as `type` is a (32-bit wide)
821 // reference type (`DataType::Type::kReference`).
822 __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
823 arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
824 instruction_,
825 instruction_->GetDexPc(),
826 this);
827 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
828 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
829
830 RestoreLiveRegisters(codegen, locations);
831 __ B(GetExitLabel());
832 }
833
GetDescription() const834 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; }
835
836 private:
837 const Location out_;
838 const Location root_;
839
840 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
841 };
842
843 #undef __
844
GetNextLocation(DataType::Type type)845 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
846 Location next_location;
847 if (type == DataType::Type::kVoid) {
848 LOG(FATAL) << "Unreachable type " << type;
849 }
850
851 if (DataType::IsFloatingPointType(type) &&
852 (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
853 next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
854 } else if (!DataType::IsFloatingPointType(type) &&
855 (gp_index_ < calling_convention.GetNumberOfRegisters())) {
856 next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
857 } else {
858 size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
859 next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
860 : Location::StackSlot(stack_offset);
861 }
862
863 // Space on the stack is reserved for all arguments.
864 stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
865 return next_location;
866 }
867
GetMethodLocation() const868 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
869 return LocationFrom(kArtMethodRegister);
870 }
871
CodeGeneratorARM64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)872 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
873 const CompilerOptions& compiler_options,
874 OptimizingCompilerStats* stats)
875 : CodeGenerator(graph,
876 kNumberOfAllocatableRegisters,
877 kNumberOfAllocatableFPRegisters,
878 kNumberOfAllocatableRegisterPairs,
879 callee_saved_core_registers.GetList(),
880 callee_saved_fp_registers.GetList(),
881 compiler_options,
882 stats),
883 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
884 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
885 location_builder_(graph, this),
886 instruction_visitor_(graph, this),
887 move_resolver_(graph->GetAllocator(), this),
888 assembler_(graph->GetAllocator(),
889 compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
890 uint32_literals_(std::less<uint32_t>(),
891 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
892 uint64_literals_(std::less<uint64_t>(),
893 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
894 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
895 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
896 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
897 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
898 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
899 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
900 boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
901 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
902 jit_string_patches_(StringReferenceValueComparator(),
903 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
904 jit_class_patches_(TypeReferenceValueComparator(),
905 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
906 jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
907 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
908 // Save the link register (containing the return address) to mimic Quick.
909 AddAllocatedRegister(LocationFrom(lr));
910 }
911
912 #define __ GetVIXLAssembler()->
913
EmitJumpTables()914 void CodeGeneratorARM64::EmitJumpTables() {
915 for (auto&& jump_table : jump_tables_) {
916 jump_table->EmitTable(this);
917 }
918 }
919
Finalize(CodeAllocator * allocator)920 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
921 EmitJumpTables();
922
923 // Emit JIT baker read barrier slow paths.
924 DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty());
925 for (auto& entry : jit_baker_read_barrier_slow_paths_) {
926 uint32_t encoded_data = entry.first;
927 vixl::aarch64::Label* slow_path_entry = &entry.second.label;
928 __ Bind(slow_path_entry);
929 CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
930 }
931
932 // Ensure we emit the literal pool.
933 __ FinalizeCode();
934
935 CodeGenerator::Finalize(allocator);
936
937 // Verify Baker read barrier linker patches.
938 if (kIsDebugBuild) {
939 ArrayRef<const uint8_t> code = allocator->GetMemory();
940 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
941 DCHECK(info.label.IsBound());
942 uint32_t literal_offset = info.label.GetLocation();
943 DCHECK_ALIGNED(literal_offset, 4u);
944
945 auto GetInsn = [&code](uint32_t offset) {
946 DCHECK_ALIGNED(offset, 4u);
947 return
948 (static_cast<uint32_t>(code[offset + 0]) << 0) +
949 (static_cast<uint32_t>(code[offset + 1]) << 8) +
950 (static_cast<uint32_t>(code[offset + 2]) << 16)+
951 (static_cast<uint32_t>(code[offset + 3]) << 24);
952 };
953
954 const uint32_t encoded_data = info.custom_data;
955 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
956 // Check that the next instruction matches the expected LDR.
957 switch (kind) {
958 case BakerReadBarrierKind::kField:
959 case BakerReadBarrierKind::kAcquire: {
960 DCHECK_GE(code.size() - literal_offset, 8u);
961 uint32_t next_insn = GetInsn(literal_offset + 4u);
962 CheckValidReg(next_insn & 0x1fu); // Check destination register.
963 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
964 if (kind == BakerReadBarrierKind::kField) {
965 // LDR (immediate) with correct base_reg.
966 CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
967 } else {
968 DCHECK(kind == BakerReadBarrierKind::kAcquire);
969 // LDAR with correct base_reg.
970 CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5));
971 }
972 break;
973 }
974 case BakerReadBarrierKind::kArray: {
975 DCHECK_GE(code.size() - literal_offset, 8u);
976 uint32_t next_insn = GetInsn(literal_offset + 4u);
977 // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
978 // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
979 CheckValidReg(next_insn & 0x1fu); // Check destination register.
980 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
981 CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
982 CheckValidReg((next_insn >> 16) & 0x1f); // Check index register
983 break;
984 }
985 case BakerReadBarrierKind::kGcRoot: {
986 DCHECK_GE(literal_offset, 4u);
987 uint32_t prev_insn = GetInsn(literal_offset - 4u);
988 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
989 // Usually LDR (immediate) with correct root_reg but
990 // we may have a "MOV marked, old_value" for UnsafeCASObject.
991 if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV?
992 CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR?
993 }
994 break;
995 }
996 default:
997 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
998 UNREACHABLE();
999 }
1000 }
1001 }
1002 }
1003
PrepareForEmitNativeCode()1004 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1005 // Note: There are 6 kinds of moves:
1006 // 1. constant -> GPR/FPR (non-cycle)
1007 // 2. constant -> stack (non-cycle)
1008 // 3. GPR/FPR -> GPR/FPR
1009 // 4. GPR/FPR -> stack
1010 // 5. stack -> GPR/FPR
1011 // 6. stack -> stack (non-cycle)
1012 // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1013 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1014 // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1015 // dependency.
1016 vixl_temps_.Open(GetVIXLAssembler());
1017 }
1018
FinishEmitNativeCode()1019 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1020 vixl_temps_.Close();
1021 }
1022
AllocateScratchLocationFor(Location::Kind kind)1023 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1024 DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1025 || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1026 || kind == Location::kSIMDStackSlot);
1027 kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1028 ? Location::kFpuRegister
1029 : Location::kRegister;
1030 Location scratch = GetScratchLocation(kind);
1031 if (!scratch.Equals(Location::NoLocation())) {
1032 return scratch;
1033 }
1034 // Allocate from VIXL temp registers.
1035 if (kind == Location::kRegister) {
1036 scratch = LocationFrom(vixl_temps_.AcquireX());
1037 } else {
1038 DCHECK_EQ(kind, Location::kFpuRegister);
1039 scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
1040 ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
1041 : vixl_temps_.AcquireD());
1042 }
1043 AddScratchLocation(scratch);
1044 return scratch;
1045 }
1046
FreeScratchLocation(Location loc)1047 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1048 if (loc.IsRegister()) {
1049 vixl_temps_.Release(XRegisterFrom(loc));
1050 } else {
1051 DCHECK(loc.IsFpuRegister());
1052 vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
1053 }
1054 RemoveScratchLocation(loc);
1055 }
1056
EmitMove(size_t index)1057 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1058 MoveOperands* move = moves_[index];
1059 codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1060 }
1061
GenerateFrameEntry()1062 void CodeGeneratorARM64::GenerateFrameEntry() {
1063 MacroAssembler* masm = GetVIXLAssembler();
1064 __ Bind(&frame_entry_label_);
1065
1066 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1067 UseScratchRegisterScope temps(masm);
1068 Register temp = temps.AcquireX();
1069 __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
1070 __ Add(temp, temp, 1);
1071 __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
1072 }
1073
1074 bool do_overflow_check =
1075 FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1076 if (do_overflow_check) {
1077 UseScratchRegisterScope temps(masm);
1078 Register temp = temps.AcquireX();
1079 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1080 __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1081 {
1082 // Ensure that between load and RecordPcInfo there are no pools emitted.
1083 ExactAssemblyScope eas(GetVIXLAssembler(),
1084 kInstructionSize,
1085 CodeBufferCheckScope::kExactSize);
1086 __ ldr(wzr, MemOperand(temp, 0));
1087 RecordPcInfo(nullptr, 0);
1088 }
1089 }
1090
1091 if (!HasEmptyFrame()) {
1092 int frame_size = GetFrameSize();
1093 // Stack layout:
1094 // sp[frame_size - 8] : lr.
1095 // ... : other preserved core registers.
1096 // ... : other preserved fp registers.
1097 // ... : reserved frame space.
1098 // sp[0] : current method.
1099
1100 // Save the current method if we need it. Note that we do not
1101 // do this in HCurrentMethod, as the instruction might have been removed
1102 // in the SSA graph.
1103 if (RequiresCurrentMethod()) {
1104 __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1105 } else {
1106 __ Claim(frame_size);
1107 }
1108 GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1109 GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
1110 frame_size - GetCoreSpillSize());
1111 GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
1112 frame_size - FrameEntrySpillSize());
1113
1114 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1115 // Initialize should_deoptimize flag to 0.
1116 Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1117 __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1118 }
1119 }
1120
1121 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
1122 }
1123
GenerateFrameExit()1124 void CodeGeneratorARM64::GenerateFrameExit() {
1125 GetAssembler()->cfi().RememberState();
1126 if (!HasEmptyFrame()) {
1127 int frame_size = GetFrameSize();
1128 GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(),
1129 frame_size - FrameEntrySpillSize());
1130 GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(),
1131 frame_size - GetCoreSpillSize());
1132 __ Drop(frame_size);
1133 GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1134 }
1135 __ Ret();
1136 GetAssembler()->cfi().RestoreState();
1137 GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1138 }
1139
GetFramePreservedCoreRegisters() const1140 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1141 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1142 return CPURegList(CPURegister::kRegister, kXRegSize,
1143 core_spill_mask_);
1144 }
1145
GetFramePreservedFPRegisters() const1146 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1147 DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1148 GetNumberOfFloatingPointRegisters()));
1149 return CPURegList(CPURegister::kFPRegister, kDRegSize,
1150 fpu_spill_mask_);
1151 }
1152
Bind(HBasicBlock * block)1153 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1154 __ Bind(GetLabelOf(block));
1155 }
1156
MoveConstant(Location location,int32_t value)1157 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1158 DCHECK(location.IsRegister());
1159 __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1160 }
1161
AddLocationAsTemp(Location location,LocationSummary * locations)1162 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1163 if (location.IsRegister()) {
1164 locations->AddTemp(location);
1165 } else {
1166 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1167 }
1168 }
1169
MarkGCCard(Register object,Register value,bool value_can_be_null)1170 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
1171 UseScratchRegisterScope temps(GetVIXLAssembler());
1172 Register card = temps.AcquireX();
1173 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit.
1174 vixl::aarch64::Label done;
1175 if (value_can_be_null) {
1176 __ Cbz(value, &done);
1177 }
1178 // Load the address of the card table into `card`.
1179 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1180 // Calculate the offset (in the card table) of the card corresponding to
1181 // `object`.
1182 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1183 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
1184 // `object`'s card.
1185 //
1186 // Register `card` contains the address of the card table. Note that the card
1187 // table's base is biased during its creation so that it always starts at an
1188 // address whose least-significant byte is equal to `kCardDirty` (see
1189 // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
1190 // below writes the `kCardDirty` (byte) value into the `object`'s card
1191 // (located at `card + object >> kCardShift`).
1192 //
1193 // This dual use of the value in register `card` (1. to calculate the location
1194 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
1195 // (no need to explicitly load `kCardDirty` as an immediate value).
1196 __ Strb(card, MemOperand(card, temp.X()));
1197 if (value_can_be_null) {
1198 __ Bind(&done);
1199 }
1200 }
1201
SetupBlockedRegisters() const1202 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1203 // Blocked core registers:
1204 // lr : Runtime reserved.
1205 // tr : Runtime reserved.
1206 // mr : Runtime reserved.
1207 // ip1 : VIXL core temp.
1208 // ip0 : VIXL core temp.
1209 // x18 : Platform register.
1210 //
1211 // Blocked fp registers:
1212 // d31 : VIXL fp temp.
1213 CPURegList reserved_core_registers = vixl_reserved_core_registers;
1214 reserved_core_registers.Combine(runtime_reserved_core_registers);
1215 while (!reserved_core_registers.IsEmpty()) {
1216 blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1217 }
1218 blocked_core_registers_[X18] = true;
1219
1220 CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1221 while (!reserved_fp_registers.IsEmpty()) {
1222 blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1223 }
1224
1225 if (GetGraph()->IsDebuggable()) {
1226 // Stubs do not save callee-save floating point registers. If the graph
1227 // is debuggable, we need to deal with these registers differently. For
1228 // now, just block them.
1229 CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1230 while (!reserved_fp_registers_debuggable.IsEmpty()) {
1231 blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1232 }
1233 }
1234 }
1235
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1236 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1237 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1238 __ Str(reg, MemOperand(sp, stack_index));
1239 return kArm64WordSize;
1240 }
1241
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1242 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1243 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1244 __ Ldr(reg, MemOperand(sp, stack_index));
1245 return kArm64WordSize;
1246 }
1247
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1248 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1249 FPRegister reg = FPRegister(reg_id, kDRegSize);
1250 __ Str(reg, MemOperand(sp, stack_index));
1251 return kArm64WordSize;
1252 }
1253
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1254 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1255 FPRegister reg = FPRegister(reg_id, kDRegSize);
1256 __ Ldr(reg, MemOperand(sp, stack_index));
1257 return kArm64WordSize;
1258 }
1259
DumpCoreRegister(std::ostream & stream,int reg) const1260 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1261 stream << XRegister(reg);
1262 }
1263
DumpFloatingPointRegister(std::ostream & stream,int reg) const1264 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1265 stream << DRegister(reg);
1266 }
1267
GetInstructionSetFeatures() const1268 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const {
1269 return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures();
1270 }
1271
MoveConstant(CPURegister destination,HConstant * constant)1272 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1273 if (constant->IsIntConstant()) {
1274 __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1275 } else if (constant->IsLongConstant()) {
1276 __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1277 } else if (constant->IsNullConstant()) {
1278 __ Mov(Register(destination), 0);
1279 } else if (constant->IsFloatConstant()) {
1280 __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
1281 } else {
1282 DCHECK(constant->IsDoubleConstant());
1283 __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue());
1284 }
1285 }
1286
1287
CoherentConstantAndType(Location constant,DataType::Type type)1288 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1289 DCHECK(constant.IsConstant());
1290 HConstant* cst = constant.GetConstant();
1291 return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1292 // Null is mapped to a core W register, which we associate with kPrimInt.
1293 (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1294 (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1295 (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1296 (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1297 }
1298
1299 // Allocate a scratch register from the VIXL pool, querying first
1300 // the floating-point register pool, and then the core register
1301 // pool. This is essentially a reimplementation of
1302 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1303 // using a different allocation strategy.
AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler * masm,vixl::aarch64::UseScratchRegisterScope * temps,int size_in_bits)1304 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1305 vixl::aarch64::UseScratchRegisterScope* temps,
1306 int size_in_bits) {
1307 return masm->GetScratchFPRegisterList()->IsEmpty()
1308 ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1309 : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1310 }
1311
MoveLocation(Location destination,Location source,DataType::Type dst_type)1312 void CodeGeneratorARM64::MoveLocation(Location destination,
1313 Location source,
1314 DataType::Type dst_type) {
1315 if (source.Equals(destination)) {
1316 return;
1317 }
1318
1319 // A valid move can always be inferred from the destination and source
1320 // locations. When moving from and to a register, the argument type can be
1321 // used to generate 32bit instead of 64bit moves. In debug mode we also
1322 // checks the coherency of the locations and the type.
1323 bool unspecified_type = (dst_type == DataType::Type::kVoid);
1324
1325 if (destination.IsRegister() || destination.IsFpuRegister()) {
1326 if (unspecified_type) {
1327 HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1328 if (source.IsStackSlot() ||
1329 (src_cst != nullptr && (src_cst->IsIntConstant()
1330 || src_cst->IsFloatConstant()
1331 || src_cst->IsNullConstant()))) {
1332 // For stack slots and 32bit constants, a 64bit type is appropriate.
1333 dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1334 } else {
1335 // If the source is a double stack slot or a 64bit constant, a 64bit
1336 // type is appropriate. Else the source is a register, and since the
1337 // type has not been specified, we chose a 64bit type to force a 64bit
1338 // move.
1339 dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1340 }
1341 }
1342 DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1343 (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1344 CPURegister dst = CPURegisterFrom(destination, dst_type);
1345 if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1346 DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1347 __ Ldr(dst, StackOperandFrom(source));
1348 } else if (source.IsSIMDStackSlot()) {
1349 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1350 } else if (source.IsConstant()) {
1351 DCHECK(CoherentConstantAndType(source, dst_type));
1352 MoveConstant(dst, source.GetConstant());
1353 } else if (source.IsRegister()) {
1354 if (destination.IsRegister()) {
1355 __ Mov(Register(dst), RegisterFrom(source, dst_type));
1356 } else {
1357 DCHECK(destination.IsFpuRegister());
1358 DataType::Type source_type = DataType::Is64BitType(dst_type)
1359 ? DataType::Type::kInt64
1360 : DataType::Type::kInt32;
1361 __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1362 }
1363 } else {
1364 DCHECK(source.IsFpuRegister());
1365 if (destination.IsRegister()) {
1366 DataType::Type source_type = DataType::Is64BitType(dst_type)
1367 ? DataType::Type::kFloat64
1368 : DataType::Type::kFloat32;
1369 __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1370 } else {
1371 DCHECK(destination.IsFpuRegister());
1372 if (GetGraph()->HasSIMD()) {
1373 __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1374 } else {
1375 __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
1376 }
1377 }
1378 }
1379 } else if (destination.IsSIMDStackSlot()) {
1380 if (source.IsFpuRegister()) {
1381 __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1382 } else {
1383 DCHECK(source.IsSIMDStackSlot());
1384 UseScratchRegisterScope temps(GetVIXLAssembler());
1385 if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) {
1386 Register temp = temps.AcquireX();
1387 __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1388 __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1389 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1390 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1391 } else {
1392 FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1393 __ Ldr(temp, StackOperandFrom(source));
1394 __ Str(temp, StackOperandFrom(destination));
1395 }
1396 }
1397 } else { // The destination is not a register. It must be a stack slot.
1398 DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1399 if (source.IsRegister() || source.IsFpuRegister()) {
1400 if (unspecified_type) {
1401 if (source.IsRegister()) {
1402 dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1403 } else {
1404 dst_type =
1405 destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1406 }
1407 }
1408 DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1409 (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1410 __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1411 } else if (source.IsConstant()) {
1412 DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1413 << source << " " << dst_type;
1414 UseScratchRegisterScope temps(GetVIXLAssembler());
1415 HConstant* src_cst = source.GetConstant();
1416 CPURegister temp;
1417 if (src_cst->IsZeroBitPattern()) {
1418 temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1419 ? Register(xzr)
1420 : Register(wzr);
1421 } else {
1422 if (src_cst->IsIntConstant()) {
1423 temp = temps.AcquireW();
1424 } else if (src_cst->IsLongConstant()) {
1425 temp = temps.AcquireX();
1426 } else if (src_cst->IsFloatConstant()) {
1427 temp = temps.AcquireS();
1428 } else {
1429 DCHECK(src_cst->IsDoubleConstant());
1430 temp = temps.AcquireD();
1431 }
1432 MoveConstant(temp, src_cst);
1433 }
1434 __ Str(temp, StackOperandFrom(destination));
1435 } else {
1436 DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1437 DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1438 UseScratchRegisterScope temps(GetVIXLAssembler());
1439 // Use any scratch register (a core or a floating-point one)
1440 // from VIXL scratch register pools as a temporary.
1441 //
1442 // We used to only use the FP scratch register pool, but in some
1443 // rare cases the only register from this pool (D31) would
1444 // already be used (e.g. within a ParallelMove instruction, when
1445 // a move is blocked by a another move requiring a scratch FP
1446 // register, which would reserve D31). To prevent this issue, we
1447 // ask for a scratch register of any type (core or FP).
1448 //
1449 // Also, we start by asking for a FP scratch register first, as the
1450 // demand of scratch core registers is higher. This is why we
1451 // use AcquireFPOrCoreCPURegisterOfSize instead of
1452 // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1453 // allocates core scratch registers first.
1454 CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1455 GetVIXLAssembler(),
1456 &temps,
1457 (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1458 __ Ldr(temp, StackOperandFrom(source));
1459 __ Str(temp, StackOperandFrom(destination));
1460 }
1461 }
1462 }
1463
Load(DataType::Type type,CPURegister dst,const MemOperand & src)1464 void CodeGeneratorARM64::Load(DataType::Type type,
1465 CPURegister dst,
1466 const MemOperand& src) {
1467 switch (type) {
1468 case DataType::Type::kBool:
1469 case DataType::Type::kUint8:
1470 __ Ldrb(Register(dst), src);
1471 break;
1472 case DataType::Type::kInt8:
1473 __ Ldrsb(Register(dst), src);
1474 break;
1475 case DataType::Type::kUint16:
1476 __ Ldrh(Register(dst), src);
1477 break;
1478 case DataType::Type::kInt16:
1479 __ Ldrsh(Register(dst), src);
1480 break;
1481 case DataType::Type::kInt32:
1482 case DataType::Type::kReference:
1483 case DataType::Type::kInt64:
1484 case DataType::Type::kFloat32:
1485 case DataType::Type::kFloat64:
1486 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1487 __ Ldr(dst, src);
1488 break;
1489 case DataType::Type::kUint32:
1490 case DataType::Type::kUint64:
1491 case DataType::Type::kVoid:
1492 LOG(FATAL) << "Unreachable type " << type;
1493 }
1494 }
1495
LoadAcquire(HInstruction * instruction,CPURegister dst,const MemOperand & src,bool needs_null_check)1496 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1497 CPURegister dst,
1498 const MemOperand& src,
1499 bool needs_null_check) {
1500 MacroAssembler* masm = GetVIXLAssembler();
1501 UseScratchRegisterScope temps(masm);
1502 Register temp_base = temps.AcquireX();
1503 DataType::Type type = instruction->GetType();
1504
1505 DCHECK(!src.IsPreIndex());
1506 DCHECK(!src.IsPostIndex());
1507
1508 // TODO(vixl): Let the MacroAssembler handle MemOperand.
1509 __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1510 {
1511 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1512 MemOperand base = MemOperand(temp_base);
1513 switch (type) {
1514 case DataType::Type::kBool:
1515 case DataType::Type::kUint8:
1516 case DataType::Type::kInt8:
1517 {
1518 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1519 __ ldarb(Register(dst), base);
1520 if (needs_null_check) {
1521 MaybeRecordImplicitNullCheck(instruction);
1522 }
1523 }
1524 if (type == DataType::Type::kInt8) {
1525 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1526 }
1527 break;
1528 case DataType::Type::kUint16:
1529 case DataType::Type::kInt16:
1530 {
1531 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1532 __ ldarh(Register(dst), base);
1533 if (needs_null_check) {
1534 MaybeRecordImplicitNullCheck(instruction);
1535 }
1536 }
1537 if (type == DataType::Type::kInt16) {
1538 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1539 }
1540 break;
1541 case DataType::Type::kInt32:
1542 case DataType::Type::kReference:
1543 case DataType::Type::kInt64:
1544 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1545 {
1546 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1547 __ ldar(Register(dst), base);
1548 if (needs_null_check) {
1549 MaybeRecordImplicitNullCheck(instruction);
1550 }
1551 }
1552 break;
1553 case DataType::Type::kFloat32:
1554 case DataType::Type::kFloat64: {
1555 DCHECK(dst.IsFPRegister());
1556 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1557
1558 Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1559 {
1560 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1561 __ ldar(temp, base);
1562 if (needs_null_check) {
1563 MaybeRecordImplicitNullCheck(instruction);
1564 }
1565 }
1566 __ Fmov(FPRegister(dst), temp);
1567 break;
1568 }
1569 case DataType::Type::kUint32:
1570 case DataType::Type::kUint64:
1571 case DataType::Type::kVoid:
1572 LOG(FATAL) << "Unreachable type " << type;
1573 }
1574 }
1575 }
1576
Store(DataType::Type type,CPURegister src,const MemOperand & dst)1577 void CodeGeneratorARM64::Store(DataType::Type type,
1578 CPURegister src,
1579 const MemOperand& dst) {
1580 switch (type) {
1581 case DataType::Type::kBool:
1582 case DataType::Type::kUint8:
1583 case DataType::Type::kInt8:
1584 __ Strb(Register(src), dst);
1585 break;
1586 case DataType::Type::kUint16:
1587 case DataType::Type::kInt16:
1588 __ Strh(Register(src), dst);
1589 break;
1590 case DataType::Type::kInt32:
1591 case DataType::Type::kReference:
1592 case DataType::Type::kInt64:
1593 case DataType::Type::kFloat32:
1594 case DataType::Type::kFloat64:
1595 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1596 __ Str(src, dst);
1597 break;
1598 case DataType::Type::kUint32:
1599 case DataType::Type::kUint64:
1600 case DataType::Type::kVoid:
1601 LOG(FATAL) << "Unreachable type " << type;
1602 }
1603 }
1604
StoreRelease(HInstruction * instruction,DataType::Type type,CPURegister src,const MemOperand & dst,bool needs_null_check)1605 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
1606 DataType::Type type,
1607 CPURegister src,
1608 const MemOperand& dst,
1609 bool needs_null_check) {
1610 MacroAssembler* masm = GetVIXLAssembler();
1611 UseScratchRegisterScope temps(GetVIXLAssembler());
1612 Register temp_base = temps.AcquireX();
1613
1614 DCHECK(!dst.IsPreIndex());
1615 DCHECK(!dst.IsPostIndex());
1616
1617 // TODO(vixl): Let the MacroAssembler handle this.
1618 Operand op = OperandFromMemOperand(dst);
1619 __ Add(temp_base, dst.GetBaseRegister(), op);
1620 MemOperand base = MemOperand(temp_base);
1621 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
1622 switch (type) {
1623 case DataType::Type::kBool:
1624 case DataType::Type::kUint8:
1625 case DataType::Type::kInt8:
1626 {
1627 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1628 __ stlrb(Register(src), base);
1629 if (needs_null_check) {
1630 MaybeRecordImplicitNullCheck(instruction);
1631 }
1632 }
1633 break;
1634 case DataType::Type::kUint16:
1635 case DataType::Type::kInt16:
1636 {
1637 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1638 __ stlrh(Register(src), base);
1639 if (needs_null_check) {
1640 MaybeRecordImplicitNullCheck(instruction);
1641 }
1642 }
1643 break;
1644 case DataType::Type::kInt32:
1645 case DataType::Type::kReference:
1646 case DataType::Type::kInt64:
1647 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1648 {
1649 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1650 __ stlr(Register(src), base);
1651 if (needs_null_check) {
1652 MaybeRecordImplicitNullCheck(instruction);
1653 }
1654 }
1655 break;
1656 case DataType::Type::kFloat32:
1657 case DataType::Type::kFloat64: {
1658 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1659 Register temp_src;
1660 if (src.IsZero()) {
1661 // The zero register is used to avoid synthesizing zero constants.
1662 temp_src = Register(src);
1663 } else {
1664 DCHECK(src.IsFPRegister());
1665 temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1666 __ Fmov(temp_src, FPRegister(src));
1667 }
1668 {
1669 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1670 __ stlr(temp_src, base);
1671 if (needs_null_check) {
1672 MaybeRecordImplicitNullCheck(instruction);
1673 }
1674 }
1675 break;
1676 }
1677 case DataType::Type::kUint32:
1678 case DataType::Type::kUint64:
1679 case DataType::Type::kVoid:
1680 LOG(FATAL) << "Unreachable type " << type;
1681 }
1682 }
1683
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1684 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1685 HInstruction* instruction,
1686 uint32_t dex_pc,
1687 SlowPathCode* slow_path) {
1688 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1689
1690 __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
1691 {
1692 // Ensure the pc position is recorded immediately after the `blr` instruction.
1693 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
1694 __ blr(lr);
1695 if (EntrypointRequiresStackMap(entrypoint)) {
1696 RecordPcInfo(instruction, dex_pc, slow_path);
1697 }
1698 }
1699 }
1700
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1701 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1702 HInstruction* instruction,
1703 SlowPathCode* slow_path) {
1704 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1705 __ Ldr(lr, MemOperand(tr, entry_point_offset));
1706 __ Blr(lr);
1707 }
1708
GenerateClassInitializationCheck(SlowPathCodeARM64 * slow_path,Register class_reg)1709 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
1710 Register class_reg) {
1711 UseScratchRegisterScope temps(GetVIXLAssembler());
1712 Register temp = temps.AcquireW();
1713 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
1714 const size_t status_byte_offset =
1715 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
1716 constexpr uint32_t shifted_initialized_value =
1717 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
1718
1719 // Even if the initialized flag is set, we need to ensure consistent memory ordering.
1720 // TODO(vixl): Let the MacroAssembler handle MemOperand.
1721 __ Add(temp, class_reg, status_byte_offset);
1722 __ Ldarb(temp, HeapOperand(temp));
1723 __ Cmp(temp, shifted_initialized_value);
1724 __ B(lo, slow_path->GetEntryLabel());
1725 __ Bind(slow_path->GetExitLabel());
1726 }
1727
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,vixl::aarch64::Register temp)1728 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
1729 HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
1730 uint32_t path_to_root = check->GetBitstringPathToRoot();
1731 uint32_t mask = check->GetBitstringMask();
1732 DCHECK(IsPowerOfTwo(mask + 1));
1733 size_t mask_bits = WhichPowerOf2(mask + 1);
1734
1735 if (mask_bits == 16u) {
1736 // Load only the bitstring part of the status word.
1737 __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
1738 } else {
1739 // /* uint32_t */ temp = temp->status_
1740 __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
1741 // Extract the bitstring bits.
1742 __ Ubfx(temp, temp, 0, mask_bits);
1743 }
1744 // Compare the bitstring bits to `path_to_root`.
1745 __ Cmp(temp, path_to_root);
1746 }
1747
GenerateMemoryBarrier(MemBarrierKind kind)1748 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
1749 BarrierType type = BarrierAll;
1750
1751 switch (kind) {
1752 case MemBarrierKind::kAnyAny:
1753 case MemBarrierKind::kAnyStore: {
1754 type = BarrierAll;
1755 break;
1756 }
1757 case MemBarrierKind::kLoadAny: {
1758 type = BarrierReads;
1759 break;
1760 }
1761 case MemBarrierKind::kStoreStore: {
1762 type = BarrierWrites;
1763 break;
1764 }
1765 default:
1766 LOG(FATAL) << "Unexpected memory barrier " << kind;
1767 }
1768 __ Dmb(InnerShareable, type);
1769 }
1770
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)1771 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
1772 HBasicBlock* successor) {
1773 SuspendCheckSlowPathARM64* slow_path =
1774 down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
1775 if (slow_path == nullptr) {
1776 slow_path =
1777 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
1778 instruction->SetSlowPath(slow_path);
1779 codegen_->AddSlowPath(slow_path);
1780 if (successor != nullptr) {
1781 DCHECK(successor->IsLoopHeader());
1782 }
1783 } else {
1784 DCHECK_EQ(slow_path->GetSuccessor(), successor);
1785 }
1786
1787 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
1788 Register temp = temps.AcquireW();
1789
1790 __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
1791 if (successor == nullptr) {
1792 __ Cbnz(temp, slow_path->GetEntryLabel());
1793 __ Bind(slow_path->GetReturnLabel());
1794 } else {
1795 __ Cbz(temp, codegen_->GetLabelOf(successor));
1796 __ B(slow_path->GetEntryLabel());
1797 // slow_path will return to GetLabelOf(successor).
1798 }
1799 }
1800
InstructionCodeGeneratorARM64(HGraph * graph,CodeGeneratorARM64 * codegen)1801 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
1802 CodeGeneratorARM64* codegen)
1803 : InstructionCodeGenerator(graph, codegen),
1804 assembler_(codegen->GetAssembler()),
1805 codegen_(codegen) {}
1806
HandleBinaryOp(HBinaryOperation * instr)1807 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
1808 DCHECK_EQ(instr->InputCount(), 2U);
1809 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
1810 DataType::Type type = instr->GetResultType();
1811 switch (type) {
1812 case DataType::Type::kInt32:
1813 case DataType::Type::kInt64:
1814 locations->SetInAt(0, Location::RequiresRegister());
1815 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
1816 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1817 break;
1818
1819 case DataType::Type::kFloat32:
1820 case DataType::Type::kFloat64:
1821 locations->SetInAt(0, Location::RequiresFpuRegister());
1822 locations->SetInAt(1, Location::RequiresFpuRegister());
1823 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
1824 break;
1825
1826 default:
1827 LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
1828 }
1829 }
1830
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)1831 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
1832 const FieldInfo& field_info) {
1833 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
1834
1835 bool object_field_get_with_read_barrier =
1836 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
1837 LocationSummary* locations =
1838 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
1839 object_field_get_with_read_barrier
1840 ? LocationSummary::kCallOnSlowPath
1841 : LocationSummary::kNoCall);
1842 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
1843 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1844 // We need a temporary register for the read barrier load in
1845 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
1846 // only if the field is volatile or the offset is too big.
1847 if (field_info.IsVolatile() ||
1848 field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
1849 locations->AddTemp(FixedTempLocation());
1850 }
1851 }
1852 locations->SetInAt(0, Location::RequiresRegister());
1853 if (DataType::IsFloatingPointType(instruction->GetType())) {
1854 locations->SetOut(Location::RequiresFpuRegister());
1855 } else {
1856 // The output overlaps for an object field get when read barriers
1857 // are enabled: we do not want the load to overwrite the object's
1858 // location, as we need it to emit the read barrier.
1859 locations->SetOut(
1860 Location::RequiresRegister(),
1861 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
1862 }
1863 }
1864
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)1865 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
1866 const FieldInfo& field_info) {
1867 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
1868 LocationSummary* locations = instruction->GetLocations();
1869 Location base_loc = locations->InAt(0);
1870 Location out = locations->Out();
1871 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
1872 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
1873 DataType::Type load_type = instruction->GetType();
1874 MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
1875
1876 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
1877 load_type == DataType::Type::kReference) {
1878 // Object FieldGet with Baker's read barrier case.
1879 // /* HeapReference<Object> */ out = *(base + offset)
1880 Register base = RegisterFrom(base_loc, DataType::Type::kReference);
1881 Location maybe_temp =
1882 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
1883 // Note that potential implicit null checks are handled in this
1884 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
1885 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1886 instruction,
1887 out,
1888 base,
1889 offset,
1890 maybe_temp,
1891 /* needs_null_check= */ true,
1892 field_info.IsVolatile());
1893 } else {
1894 // General case.
1895 if (field_info.IsVolatile()) {
1896 // Note that a potential implicit null check is handled in this
1897 // CodeGeneratorARM64::LoadAcquire call.
1898 // NB: LoadAcquire will record the pc info if needed.
1899 codegen_->LoadAcquire(
1900 instruction, OutputCPURegister(instruction), field, /* needs_null_check= */ true);
1901 } else {
1902 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1903 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
1904 codegen_->Load(load_type, OutputCPURegister(instruction), field);
1905 codegen_->MaybeRecordImplicitNullCheck(instruction);
1906 }
1907 if (load_type == DataType::Type::kReference) {
1908 // If read barriers are enabled, emit read barriers other than
1909 // Baker's using a slow path (and also unpoison the loaded
1910 // reference, if heap poisoning is enabled).
1911 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
1912 }
1913 }
1914 }
1915
HandleFieldSet(HInstruction * instruction)1916 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
1917 LocationSummary* locations =
1918 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
1919 locations->SetInAt(0, Location::RequiresRegister());
1920 if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
1921 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
1922 } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
1923 locations->SetInAt(1, Location::RequiresFpuRegister());
1924 } else {
1925 locations->SetInAt(1, Location::RequiresRegister());
1926 }
1927 }
1928
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)1929 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
1930 const FieldInfo& field_info,
1931 bool value_can_be_null) {
1932 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
1933
1934 Register obj = InputRegisterAt(instruction, 0);
1935 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
1936 CPURegister source = value;
1937 Offset offset = field_info.GetFieldOffset();
1938 DataType::Type field_type = field_info.GetFieldType();
1939
1940 {
1941 // We use a block to end the scratch scope before the write barrier, thus
1942 // freeing the temporary registers so they can be used in `MarkGCCard`.
1943 UseScratchRegisterScope temps(GetVIXLAssembler());
1944
1945 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
1946 DCHECK(value.IsW());
1947 Register temp = temps.AcquireW();
1948 __ Mov(temp, value.W());
1949 GetAssembler()->PoisonHeapReference(temp.W());
1950 source = temp;
1951 }
1952
1953 if (field_info.IsVolatile()) {
1954 codegen_->StoreRelease(
1955 instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true);
1956 } else {
1957 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
1958 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
1959 codegen_->Store(field_type, source, HeapOperand(obj, offset));
1960 codegen_->MaybeRecordImplicitNullCheck(instruction);
1961 }
1962 }
1963
1964 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
1965 codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
1966 }
1967 }
1968
HandleBinaryOp(HBinaryOperation * instr)1969 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
1970 DataType::Type type = instr->GetType();
1971
1972 switch (type) {
1973 case DataType::Type::kInt32:
1974 case DataType::Type::kInt64: {
1975 Register dst = OutputRegister(instr);
1976 Register lhs = InputRegisterAt(instr, 0);
1977 Operand rhs = InputOperandAt(instr, 1);
1978 if (instr->IsAdd()) {
1979 __ Add(dst, lhs, rhs);
1980 } else if (instr->IsAnd()) {
1981 __ And(dst, lhs, rhs);
1982 } else if (instr->IsOr()) {
1983 __ Orr(dst, lhs, rhs);
1984 } else if (instr->IsSub()) {
1985 __ Sub(dst, lhs, rhs);
1986 } else if (instr->IsRor()) {
1987 if (rhs.IsImmediate()) {
1988 uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
1989 __ Ror(dst, lhs, shift);
1990 } else {
1991 // Ensure shift distance is in the same size register as the result. If
1992 // we are rotating a long and the shift comes in a w register originally,
1993 // we don't need to sxtw for use as an x since the shift distances are
1994 // all & reg_bits - 1.
1995 __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
1996 }
1997 } else if (instr->IsMin() || instr->IsMax()) {
1998 __ Cmp(lhs, rhs);
1999 __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt);
2000 } else {
2001 DCHECK(instr->IsXor());
2002 __ Eor(dst, lhs, rhs);
2003 }
2004 break;
2005 }
2006 case DataType::Type::kFloat32:
2007 case DataType::Type::kFloat64: {
2008 FPRegister dst = OutputFPRegister(instr);
2009 FPRegister lhs = InputFPRegisterAt(instr, 0);
2010 FPRegister rhs = InputFPRegisterAt(instr, 1);
2011 if (instr->IsAdd()) {
2012 __ Fadd(dst, lhs, rhs);
2013 } else if (instr->IsSub()) {
2014 __ Fsub(dst, lhs, rhs);
2015 } else if (instr->IsMin()) {
2016 __ Fmin(dst, lhs, rhs);
2017 } else if (instr->IsMax()) {
2018 __ Fmax(dst, lhs, rhs);
2019 } else {
2020 LOG(FATAL) << "Unexpected floating-point binary operation";
2021 }
2022 break;
2023 }
2024 default:
2025 LOG(FATAL) << "Unexpected binary operation type " << type;
2026 }
2027 }
2028
HandleShift(HBinaryOperation * instr)2029 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2030 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2031
2032 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2033 DataType::Type type = instr->GetResultType();
2034 switch (type) {
2035 case DataType::Type::kInt32:
2036 case DataType::Type::kInt64: {
2037 locations->SetInAt(0, Location::RequiresRegister());
2038 locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2039 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2040 break;
2041 }
2042 default:
2043 LOG(FATAL) << "Unexpected shift type " << type;
2044 }
2045 }
2046
HandleShift(HBinaryOperation * instr)2047 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2048 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2049
2050 DataType::Type type = instr->GetType();
2051 switch (type) {
2052 case DataType::Type::kInt32:
2053 case DataType::Type::kInt64: {
2054 Register dst = OutputRegister(instr);
2055 Register lhs = InputRegisterAt(instr, 0);
2056 Operand rhs = InputOperandAt(instr, 1);
2057 if (rhs.IsImmediate()) {
2058 uint32_t shift_value = rhs.GetImmediate() &
2059 (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2060 if (instr->IsShl()) {
2061 __ Lsl(dst, lhs, shift_value);
2062 } else if (instr->IsShr()) {
2063 __ Asr(dst, lhs, shift_value);
2064 } else {
2065 __ Lsr(dst, lhs, shift_value);
2066 }
2067 } else {
2068 Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2069
2070 if (instr->IsShl()) {
2071 __ Lsl(dst, lhs, rhs_reg);
2072 } else if (instr->IsShr()) {
2073 __ Asr(dst, lhs, rhs_reg);
2074 } else {
2075 __ Lsr(dst, lhs, rhs_reg);
2076 }
2077 }
2078 break;
2079 }
2080 default:
2081 LOG(FATAL) << "Unexpected shift operation type " << type;
2082 }
2083 }
2084
VisitAdd(HAdd * instruction)2085 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2086 HandleBinaryOp(instruction);
2087 }
2088
VisitAdd(HAdd * instruction)2089 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2090 HandleBinaryOp(instruction);
2091 }
2092
VisitAnd(HAnd * instruction)2093 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2094 HandleBinaryOp(instruction);
2095 }
2096
VisitAnd(HAnd * instruction)2097 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2098 HandleBinaryOp(instruction);
2099 }
2100
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2101 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2102 DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2103 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2104 locations->SetInAt(0, Location::RequiresRegister());
2105 // There is no immediate variant of negated bitwise instructions in AArch64.
2106 locations->SetInAt(1, Location::RequiresRegister());
2107 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2108 }
2109
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instr)2110 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2111 Register dst = OutputRegister(instr);
2112 Register lhs = InputRegisterAt(instr, 0);
2113 Register rhs = InputRegisterAt(instr, 1);
2114
2115 switch (instr->GetOpKind()) {
2116 case HInstruction::kAnd:
2117 __ Bic(dst, lhs, rhs);
2118 break;
2119 case HInstruction::kOr:
2120 __ Orn(dst, lhs, rhs);
2121 break;
2122 case HInstruction::kXor:
2123 __ Eon(dst, lhs, rhs);
2124 break;
2125 default:
2126 LOG(FATAL) << "Unreachable";
2127 }
2128 }
2129
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2130 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2131 HDataProcWithShifterOp* instruction) {
2132 DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2133 instruction->GetType() == DataType::Type::kInt64);
2134 LocationSummary* locations =
2135 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2136 if (instruction->GetInstrKind() == HInstruction::kNeg) {
2137 locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
2138 } else {
2139 locations->SetInAt(0, Location::RequiresRegister());
2140 }
2141 locations->SetInAt(1, Location::RequiresRegister());
2142 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2143 }
2144
VisitDataProcWithShifterOp(HDataProcWithShifterOp * instruction)2145 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2146 HDataProcWithShifterOp* instruction) {
2147 DataType::Type type = instruction->GetType();
2148 HInstruction::InstructionKind kind = instruction->GetInstrKind();
2149 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2150 Register out = OutputRegister(instruction);
2151 Register left;
2152 if (kind != HInstruction::kNeg) {
2153 left = InputRegisterAt(instruction, 0);
2154 }
2155 // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2156 // shifter operand operation, the IR generating `right_reg` (input to the type
2157 // conversion) can have a different type from the current instruction's type,
2158 // so we manually indicate the type.
2159 Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2160 Operand right_operand(0);
2161
2162 HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2163 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2164 right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2165 } else {
2166 right_operand = Operand(right_reg,
2167 helpers::ShiftFromOpKind(op_kind),
2168 instruction->GetShiftAmount());
2169 }
2170
2171 // Logical binary operations do not support extension operations in the
2172 // operand. Note that VIXL would still manage if it was passed by generating
2173 // the extension as a separate instruction.
2174 // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2175 DCHECK(!right_operand.IsExtendedRegister() ||
2176 (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
2177 kind != HInstruction::kNeg));
2178 switch (kind) {
2179 case HInstruction::kAdd:
2180 __ Add(out, left, right_operand);
2181 break;
2182 case HInstruction::kAnd:
2183 __ And(out, left, right_operand);
2184 break;
2185 case HInstruction::kNeg:
2186 DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2187 __ Neg(out, right_operand);
2188 break;
2189 case HInstruction::kOr:
2190 __ Orr(out, left, right_operand);
2191 break;
2192 case HInstruction::kSub:
2193 __ Sub(out, left, right_operand);
2194 break;
2195 case HInstruction::kXor:
2196 __ Eor(out, left, right_operand);
2197 break;
2198 default:
2199 LOG(FATAL) << "Unexpected operation kind: " << kind;
2200 UNREACHABLE();
2201 }
2202 }
2203
VisitIntermediateAddress(HIntermediateAddress * instruction)2204 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2205 LocationSummary* locations =
2206 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2207 locations->SetInAt(0, Location::RequiresRegister());
2208 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2209 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2210 }
2211
VisitIntermediateAddress(HIntermediateAddress * instruction)2212 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2213 __ Add(OutputRegister(instruction),
2214 InputRegisterAt(instruction, 0),
2215 Operand(InputOperandAt(instruction, 1)));
2216 }
2217
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2218 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2219 LocationSummary* locations =
2220 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2221
2222 HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2223
2224 locations->SetInAt(0, Location::RequiresRegister());
2225 // For byte case we don't need to shift the index variable so we can encode the data offset into
2226 // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2227 // data offset constant generation out of the loop and reduce the critical path length in the
2228 // loop.
2229 locations->SetInAt(1, shift->GetValue() == 0
2230 ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
2231 : Location::RequiresRegister());
2232 locations->SetInAt(2, Location::ConstantLocation(shift));
2233 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2234 }
2235
VisitIntermediateAddressIndex(HIntermediateAddressIndex * instruction)2236 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2237 HIntermediateAddressIndex* instruction) {
2238 Register index_reg = InputRegisterAt(instruction, 0);
2239 uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2));
2240 uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2241
2242 if (shift == 0) {
2243 __ Add(OutputRegister(instruction), index_reg, offset);
2244 } else {
2245 Register offset_reg = InputRegisterAt(instruction, 1);
2246 __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2247 }
2248 }
2249
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2250 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2251 LocationSummary* locations =
2252 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2253 HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2254 if (instr->GetOpKind() == HInstruction::kSub &&
2255 accumulator->IsConstant() &&
2256 accumulator->AsConstant()->IsArithmeticZero()) {
2257 // Don't allocate register for Mneg instruction.
2258 } else {
2259 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2260 Location::RequiresRegister());
2261 }
2262 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2263 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2264 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2265 }
2266
VisitMultiplyAccumulate(HMultiplyAccumulate * instr)2267 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2268 Register res = OutputRegister(instr);
2269 Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2270 Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2271
2272 // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2273 // This fixup should be carried out for all multiply-accumulate instructions:
2274 // madd, msub, smaddl, smsubl, umaddl and umsubl.
2275 if (instr->GetType() == DataType::Type::kInt64 &&
2276 codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2277 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2278 vixl::aarch64::Instruction* prev =
2279 masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
2280 if (prev->IsLoadOrStore()) {
2281 // Make sure we emit only exactly one nop.
2282 ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2283 __ nop();
2284 }
2285 }
2286
2287 if (instr->GetOpKind() == HInstruction::kAdd) {
2288 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2289 __ Madd(res, mul_left, mul_right, accumulator);
2290 } else {
2291 DCHECK(instr->GetOpKind() == HInstruction::kSub);
2292 HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2293 if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2294 __ Mneg(res, mul_left, mul_right);
2295 } else {
2296 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2297 __ Msub(res, mul_left, mul_right, accumulator);
2298 }
2299 }
2300 }
2301
VisitArrayGet(HArrayGet * instruction)2302 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2303 bool object_array_get_with_read_barrier =
2304 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2305 LocationSummary* locations =
2306 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2307 object_array_get_with_read_barrier
2308 ? LocationSummary::kCallOnSlowPath
2309 : LocationSummary::kNoCall);
2310 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2311 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2312 if (instruction->GetIndex()->IsConstant()) {
2313 // Array loads with constant index are treated as field loads.
2314 // We need a temporary register for the read barrier load in
2315 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2316 // only if the offset is too big.
2317 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2318 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2319 offset += index << DataType::SizeShift(DataType::Type::kReference);
2320 if (offset >= kReferenceLoadMinFarOffset) {
2321 locations->AddTemp(FixedTempLocation());
2322 }
2323 } else if (!instruction->GetArray()->IsIntermediateAddress()) {
2324 // We need a non-scratch temporary for the array data pointer in
2325 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no
2326 // intermediate address.
2327 locations->AddTemp(Location::RequiresRegister());
2328 }
2329 }
2330 locations->SetInAt(0, Location::RequiresRegister());
2331 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2332 if (DataType::IsFloatingPointType(instruction->GetType())) {
2333 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2334 } else {
2335 // The output overlaps in the case of an object array get with
2336 // read barriers enabled: we do not want the move to overwrite the
2337 // array's location, as we need it to emit the read barrier.
2338 locations->SetOut(
2339 Location::RequiresRegister(),
2340 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2341 }
2342 }
2343
VisitArrayGet(HArrayGet * instruction)2344 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2345 DataType::Type type = instruction->GetType();
2346 Register obj = InputRegisterAt(instruction, 0);
2347 LocationSummary* locations = instruction->GetLocations();
2348 Location index = locations->InAt(1);
2349 Location out = locations->Out();
2350 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2351 const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2352 instruction->IsStringCharAt();
2353 MacroAssembler* masm = GetVIXLAssembler();
2354 UseScratchRegisterScope temps(masm);
2355
2356 // The non-Baker read barrier instrumentation of object ArrayGet instructions
2357 // does not support the HIntermediateAddress instruction.
2358 DCHECK(!((type == DataType::Type::kReference) &&
2359 instruction->GetArray()->IsIntermediateAddress() &&
2360 kEmitCompilerReadBarrier &&
2361 !kUseBakerReadBarrier));
2362
2363 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2364 // Object ArrayGet with Baker's read barrier case.
2365 // Note that a potential implicit null check is handled in the
2366 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2367 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2368 if (index.IsConstant()) {
2369 DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2370 // Array load with a constant index can be treated as a field load.
2371 offset += Int64FromLocation(index) << DataType::SizeShift(type);
2372 Location maybe_temp =
2373 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2374 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2375 out,
2376 obj.W(),
2377 offset,
2378 maybe_temp,
2379 /* needs_null_check= */ false,
2380 /* use_load_acquire= */ false);
2381 } else {
2382 codegen_->GenerateArrayLoadWithBakerReadBarrier(
2383 instruction, out, obj.W(), offset, index, /* needs_null_check= */ false);
2384 }
2385 } else {
2386 // General case.
2387 MemOperand source = HeapOperand(obj);
2388 Register length;
2389 if (maybe_compressed_char_at) {
2390 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2391 length = temps.AcquireW();
2392 {
2393 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2394 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2395
2396 if (instruction->GetArray()->IsIntermediateAddress()) {
2397 DCHECK_LT(count_offset, offset);
2398 int64_t adjusted_offset =
2399 static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2400 // Note that `adjusted_offset` is negative, so this will be a LDUR.
2401 __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2402 } else {
2403 __ Ldr(length, HeapOperand(obj, count_offset));
2404 }
2405 codegen_->MaybeRecordImplicitNullCheck(instruction);
2406 }
2407 }
2408 if (index.IsConstant()) {
2409 if (maybe_compressed_char_at) {
2410 vixl::aarch64::Label uncompressed_load, done;
2411 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2412 "Expecting 0=compressed, 1=uncompressed");
2413 __ Tbnz(length.W(), 0, &uncompressed_load);
2414 __ Ldrb(Register(OutputCPURegister(instruction)),
2415 HeapOperand(obj, offset + Int64FromLocation(index)));
2416 __ B(&done);
2417 __ Bind(&uncompressed_load);
2418 __ Ldrh(Register(OutputCPURegister(instruction)),
2419 HeapOperand(obj, offset + (Int64FromLocation(index) << 1)));
2420 __ Bind(&done);
2421 } else {
2422 offset += Int64FromLocation(index) << DataType::SizeShift(type);
2423 source = HeapOperand(obj, offset);
2424 }
2425 } else {
2426 Register temp = temps.AcquireSameSizeAs(obj);
2427 if (instruction->GetArray()->IsIntermediateAddress()) {
2428 // We do not need to compute the intermediate address from the array: the
2429 // input instruction has done it already. See the comment in
2430 // `TryExtractArrayAccessAddress()`.
2431 if (kIsDebugBuild) {
2432 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2433 DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2434 }
2435 temp = obj;
2436 } else {
2437 __ Add(temp, obj, offset);
2438 }
2439 if (maybe_compressed_char_at) {
2440 vixl::aarch64::Label uncompressed_load, done;
2441 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2442 "Expecting 0=compressed, 1=uncompressed");
2443 __ Tbnz(length.W(), 0, &uncompressed_load);
2444 __ Ldrb(Register(OutputCPURegister(instruction)),
2445 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2446 __ B(&done);
2447 __ Bind(&uncompressed_load);
2448 __ Ldrh(Register(OutputCPURegister(instruction)),
2449 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2450 __ Bind(&done);
2451 } else {
2452 source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2453 }
2454 }
2455 if (!maybe_compressed_char_at) {
2456 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2457 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2458 codegen_->Load(type, OutputCPURegister(instruction), source);
2459 codegen_->MaybeRecordImplicitNullCheck(instruction);
2460 }
2461
2462 if (type == DataType::Type::kReference) {
2463 static_assert(
2464 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2465 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2466 Location obj_loc = locations->InAt(0);
2467 if (index.IsConstant()) {
2468 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2469 } else {
2470 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2471 }
2472 }
2473 }
2474 }
2475
VisitArrayLength(HArrayLength * instruction)2476 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2477 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2478 locations->SetInAt(0, Location::RequiresRegister());
2479 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2480 }
2481
VisitArrayLength(HArrayLength * instruction)2482 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2483 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2484 vixl::aarch64::Register out = OutputRegister(instruction);
2485 {
2486 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2487 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2488 __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2489 codegen_->MaybeRecordImplicitNullCheck(instruction);
2490 }
2491 // Mask out compression flag from String's array length.
2492 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2493 __ Lsr(out.W(), out.W(), 1u);
2494 }
2495 }
2496
VisitArraySet(HArraySet * instruction)2497 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2498 DataType::Type value_type = instruction->GetComponentType();
2499
2500 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2501 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
2502 instruction,
2503 may_need_runtime_call_for_type_check ?
2504 LocationSummary::kCallOnSlowPath :
2505 LocationSummary::kNoCall);
2506 locations->SetInAt(0, Location::RequiresRegister());
2507 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2508 if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
2509 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
2510 } else if (DataType::IsFloatingPointType(value_type)) {
2511 locations->SetInAt(2, Location::RequiresFpuRegister());
2512 } else {
2513 locations->SetInAt(2, Location::RequiresRegister());
2514 }
2515 }
2516
VisitArraySet(HArraySet * instruction)2517 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2518 DataType::Type value_type = instruction->GetComponentType();
2519 LocationSummary* locations = instruction->GetLocations();
2520 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2521 bool needs_write_barrier =
2522 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2523
2524 Register array = InputRegisterAt(instruction, 0);
2525 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2526 CPURegister source = value;
2527 Location index = locations->InAt(1);
2528 size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
2529 MemOperand destination = HeapOperand(array);
2530 MacroAssembler* masm = GetVIXLAssembler();
2531
2532 if (!needs_write_barrier) {
2533 DCHECK(!may_need_runtime_call_for_type_check);
2534 if (index.IsConstant()) {
2535 offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2536 destination = HeapOperand(array, offset);
2537 } else {
2538 UseScratchRegisterScope temps(masm);
2539 Register temp = temps.AcquireSameSizeAs(array);
2540 if (instruction->GetArray()->IsIntermediateAddress()) {
2541 // We do not need to compute the intermediate address from the array: the
2542 // input instruction has done it already. See the comment in
2543 // `TryExtractArrayAccessAddress()`.
2544 if (kIsDebugBuild) {
2545 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2546 DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2547 }
2548 temp = array;
2549 } else {
2550 __ Add(temp, array, offset);
2551 }
2552 destination = HeapOperand(temp,
2553 XRegisterFrom(index),
2554 LSL,
2555 DataType::SizeShift(value_type));
2556 }
2557 {
2558 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2559 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2560 codegen_->Store(value_type, value, destination);
2561 codegen_->MaybeRecordImplicitNullCheck(instruction);
2562 }
2563 } else {
2564 DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2565 vixl::aarch64::Label done;
2566 SlowPathCodeARM64* slow_path = nullptr;
2567 {
2568 // We use a block to end the scratch scope before the write barrier, thus
2569 // freeing the temporary registers so they can be used in `MarkGCCard`.
2570 UseScratchRegisterScope temps(masm);
2571 Register temp = temps.AcquireSameSizeAs(array);
2572 if (index.IsConstant()) {
2573 offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2574 destination = HeapOperand(array, offset);
2575 } else {
2576 destination = HeapOperand(temp,
2577 XRegisterFrom(index),
2578 LSL,
2579 DataType::SizeShift(value_type));
2580 }
2581
2582 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2583 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2584 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2585
2586 if (may_need_runtime_call_for_type_check) {
2587 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
2588 codegen_->AddSlowPath(slow_path);
2589 if (instruction->GetValueCanBeNull()) {
2590 vixl::aarch64::Label non_zero;
2591 __ Cbnz(Register(value), &non_zero);
2592 if (!index.IsConstant()) {
2593 __ Add(temp, array, offset);
2594 }
2595 {
2596 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
2597 // emitted.
2598 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2599 __ Str(wzr, destination);
2600 codegen_->MaybeRecordImplicitNullCheck(instruction);
2601 }
2602 __ B(&done);
2603 __ Bind(&non_zero);
2604 }
2605
2606 // Note that when Baker read barriers are enabled, the type
2607 // checks are performed without read barriers. This is fine,
2608 // even in the case where a class object is in the from-space
2609 // after the flip, as a comparison involving such a type would
2610 // not produce a false positive; it may of course produce a
2611 // false negative, in which case we would take the ArraySet
2612 // slow path.
2613
2614 Register temp2 = temps.AcquireSameSizeAs(array);
2615 // /* HeapReference<Class> */ temp = array->klass_
2616 {
2617 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2618 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2619 __ Ldr(temp, HeapOperand(array, class_offset));
2620 codegen_->MaybeRecordImplicitNullCheck(instruction);
2621 }
2622 GetAssembler()->MaybeUnpoisonHeapReference(temp);
2623
2624 // /* HeapReference<Class> */ temp = temp->component_type_
2625 __ Ldr(temp, HeapOperand(temp, component_offset));
2626 // /* HeapReference<Class> */ temp2 = value->klass_
2627 __ Ldr(temp2, HeapOperand(Register(value), class_offset));
2628 // If heap poisoning is enabled, no need to unpoison `temp`
2629 // nor `temp2`, as we are comparing two poisoned references.
2630 __ Cmp(temp, temp2);
2631 temps.Release(temp2);
2632
2633 if (instruction->StaticTypeOfArrayIsObjectArray()) {
2634 vixl::aarch64::Label do_put;
2635 __ B(eq, &do_put);
2636 // If heap poisoning is enabled, the `temp` reference has
2637 // not been unpoisoned yet; unpoison it now.
2638 GetAssembler()->MaybeUnpoisonHeapReference(temp);
2639
2640 // /* HeapReference<Class> */ temp = temp->super_class_
2641 __ Ldr(temp, HeapOperand(temp, super_offset));
2642 // If heap poisoning is enabled, no need to unpoison
2643 // `temp`, as we are comparing against null below.
2644 __ Cbnz(temp, slow_path->GetEntryLabel());
2645 __ Bind(&do_put);
2646 } else {
2647 __ B(ne, slow_path->GetEntryLabel());
2648 }
2649 }
2650
2651 if (kPoisonHeapReferences) {
2652 Register temp2 = temps.AcquireSameSizeAs(array);
2653 DCHECK(value.IsW());
2654 __ Mov(temp2, value.W());
2655 GetAssembler()->PoisonHeapReference(temp2);
2656 source = temp2;
2657 }
2658
2659 if (!index.IsConstant()) {
2660 __ Add(temp, array, offset);
2661 } else {
2662 // We no longer need the `temp` here so release it as the store below may
2663 // need a scratch register (if the constant index makes the offset too large)
2664 // and the poisoned `source` could be using the other scratch register.
2665 temps.Release(temp);
2666 }
2667 {
2668 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2669 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2670 __ Str(source, destination);
2671
2672 if (!may_need_runtime_call_for_type_check) {
2673 codegen_->MaybeRecordImplicitNullCheck(instruction);
2674 }
2675 }
2676 }
2677
2678 codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
2679
2680 if (done.IsLinked()) {
2681 __ Bind(&done);
2682 }
2683
2684 if (slow_path != nullptr) {
2685 __ Bind(slow_path->GetExitLabel());
2686 }
2687 }
2688 }
2689
VisitBoundsCheck(HBoundsCheck * instruction)2690 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
2691 RegisterSet caller_saves = RegisterSet::Empty();
2692 InvokeRuntimeCallingConvention calling_convention;
2693 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2694 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
2695 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
2696 locations->SetInAt(0, Location::RequiresRegister());
2697 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
2698 }
2699
VisitBoundsCheck(HBoundsCheck * instruction)2700 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
2701 BoundsCheckSlowPathARM64* slow_path =
2702 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
2703 codegen_->AddSlowPath(slow_path);
2704 __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
2705 __ B(slow_path->GetEntryLabel(), hs);
2706 }
2707
VisitClinitCheck(HClinitCheck * check)2708 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
2709 LocationSummary* locations =
2710 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
2711 locations->SetInAt(0, Location::RequiresRegister());
2712 if (check->HasUses()) {
2713 locations->SetOut(Location::SameAsFirstInput());
2714 }
2715 // Rely on the type initialization to save everything we need.
2716 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
2717 }
2718
VisitClinitCheck(HClinitCheck * check)2719 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
2720 // We assume the class is not null.
2721 SlowPathCodeARM64* slow_path =
2722 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check);
2723 codegen_->AddSlowPath(slow_path);
2724 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
2725 }
2726
IsFloatingPointZeroConstant(HInstruction * inst)2727 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
2728 return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
2729 || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
2730 }
2731
GenerateFcmp(HInstruction * instruction)2732 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
2733 FPRegister lhs_reg = InputFPRegisterAt(instruction, 0);
2734 Location rhs_loc = instruction->GetLocations()->InAt(1);
2735 if (rhs_loc.IsConstant()) {
2736 // 0.0 is the only immediate that can be encoded directly in
2737 // an FCMP instruction.
2738 //
2739 // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
2740 // specify that in a floating-point comparison, positive zero
2741 // and negative zero are considered equal, so we can use the
2742 // literal 0.0 for both cases here.
2743 //
2744 // Note however that some methods (Float.equal, Float.compare,
2745 // Float.compareTo, Double.equal, Double.compare,
2746 // Double.compareTo, Math.max, Math.min, StrictMath.max,
2747 // StrictMath.min) consider 0.0 to be (strictly) greater than
2748 // -0.0. So if we ever translate calls to these methods into a
2749 // HCompare instruction, we must handle the -0.0 case with
2750 // care here.
2751 DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
2752 __ Fcmp(lhs_reg, 0.0);
2753 } else {
2754 __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
2755 }
2756 }
2757
VisitCompare(HCompare * compare)2758 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
2759 LocationSummary* locations =
2760 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2761 DataType::Type in_type = compare->InputAt(0)->GetType();
2762 switch (in_type) {
2763 case DataType::Type::kBool:
2764 case DataType::Type::kUint8:
2765 case DataType::Type::kInt8:
2766 case DataType::Type::kUint16:
2767 case DataType::Type::kInt16:
2768 case DataType::Type::kInt32:
2769 case DataType::Type::kInt64: {
2770 locations->SetInAt(0, Location::RequiresRegister());
2771 locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
2772 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2773 break;
2774 }
2775 case DataType::Type::kFloat32:
2776 case DataType::Type::kFloat64: {
2777 locations->SetInAt(0, Location::RequiresFpuRegister());
2778 locations->SetInAt(1,
2779 IsFloatingPointZeroConstant(compare->InputAt(1))
2780 ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
2781 : Location::RequiresFpuRegister());
2782 locations->SetOut(Location::RequiresRegister());
2783 break;
2784 }
2785 default:
2786 LOG(FATAL) << "Unexpected type for compare operation " << in_type;
2787 }
2788 }
2789
VisitCompare(HCompare * compare)2790 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
2791 DataType::Type in_type = compare->InputAt(0)->GetType();
2792
2793 // 0 if: left == right
2794 // 1 if: left > right
2795 // -1 if: left < right
2796 switch (in_type) {
2797 case DataType::Type::kBool:
2798 case DataType::Type::kUint8:
2799 case DataType::Type::kInt8:
2800 case DataType::Type::kUint16:
2801 case DataType::Type::kInt16:
2802 case DataType::Type::kInt32:
2803 case DataType::Type::kInt64: {
2804 Register result = OutputRegister(compare);
2805 Register left = InputRegisterAt(compare, 0);
2806 Operand right = InputOperandAt(compare, 1);
2807 __ Cmp(left, right);
2808 __ Cset(result, ne); // result == +1 if NE or 0 otherwise
2809 __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise
2810 break;
2811 }
2812 case DataType::Type::kFloat32:
2813 case DataType::Type::kFloat64: {
2814 Register result = OutputRegister(compare);
2815 GenerateFcmp(compare);
2816 __ Cset(result, ne);
2817 __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
2818 break;
2819 }
2820 default:
2821 LOG(FATAL) << "Unimplemented compare type " << in_type;
2822 }
2823 }
2824
HandleCondition(HCondition * instruction)2825 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
2826 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2827
2828 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
2829 locations->SetInAt(0, Location::RequiresFpuRegister());
2830 locations->SetInAt(1,
2831 IsFloatingPointZeroConstant(instruction->InputAt(1))
2832 ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
2833 : Location::RequiresFpuRegister());
2834 } else {
2835 // Integer cases.
2836 locations->SetInAt(0, Location::RequiresRegister());
2837 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
2838 }
2839
2840 if (!instruction->IsEmittedAtUseSite()) {
2841 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2842 }
2843 }
2844
HandleCondition(HCondition * instruction)2845 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
2846 if (instruction->IsEmittedAtUseSite()) {
2847 return;
2848 }
2849
2850 LocationSummary* locations = instruction->GetLocations();
2851 Register res = RegisterFrom(locations->Out(), instruction->GetType());
2852 IfCondition if_cond = instruction->GetCondition();
2853
2854 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
2855 GenerateFcmp(instruction);
2856 __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
2857 } else {
2858 // Integer cases.
2859 Register lhs = InputRegisterAt(instruction, 0);
2860 Operand rhs = InputOperandAt(instruction, 1);
2861 __ Cmp(lhs, rhs);
2862 __ Cset(res, ARM64Condition(if_cond));
2863 }
2864 }
2865
2866 #define FOR_EACH_CONDITION_INSTRUCTION(M) \
2867 M(Equal) \
2868 M(NotEqual) \
2869 M(LessThan) \
2870 M(LessThanOrEqual) \
2871 M(GreaterThan) \
2872 M(GreaterThanOrEqual) \
2873 M(Below) \
2874 M(BelowOrEqual) \
2875 M(Above) \
2876 M(AboveOrEqual)
2877 #define DEFINE_CONDITION_VISITORS(Name) \
2878 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \
2879 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)2880 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
2881 #undef DEFINE_CONDITION_VISITORS
2882 #undef FOR_EACH_CONDITION_INSTRUCTION
2883
2884 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) {
2885 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
2886 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
2887 DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
2888
2889 Register out = OutputRegister(instruction);
2890 Register dividend = InputRegisterAt(instruction, 0);
2891
2892 if (abs_imm == 2) {
2893 int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
2894 __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
2895 } else {
2896 UseScratchRegisterScope temps(GetVIXLAssembler());
2897 Register temp = temps.AcquireSameSizeAs(out);
2898 __ Add(temp, dividend, abs_imm - 1);
2899 __ Cmp(dividend, 0);
2900 __ Csel(out, temp, dividend, lt);
2901 }
2902
2903 int ctz_imm = CTZ(abs_imm);
2904 if (imm > 0) {
2905 __ Asr(out, out, ctz_imm);
2906 } else {
2907 __ Neg(out, Operand(out, ASR, ctz_imm));
2908 }
2909 }
2910
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)2911 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
2912 DCHECK(instruction->IsDiv() || instruction->IsRem());
2913
2914 LocationSummary* locations = instruction->GetLocations();
2915 Location second = locations->InAt(1);
2916 DCHECK(second.IsConstant());
2917
2918 Register out = OutputRegister(instruction);
2919 Register dividend = InputRegisterAt(instruction, 0);
2920 int64_t imm = Int64FromConstant(second.GetConstant());
2921
2922 DataType::Type type = instruction->GetResultType();
2923 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2924
2925 int64_t magic;
2926 int shift;
2927 CalculateMagicAndShiftForDivRem(
2928 imm, /* is_long= */ type == DataType::Type::kInt64, &magic, &shift);
2929
2930 UseScratchRegisterScope temps(GetVIXLAssembler());
2931 Register temp = temps.AcquireSameSizeAs(out);
2932
2933 // temp = get_high(dividend * magic)
2934 __ Mov(temp, magic);
2935 if (type == DataType::Type::kInt64) {
2936 __ Smulh(temp, dividend, temp);
2937 } else {
2938 __ Smull(temp.X(), dividend, temp);
2939 __ Lsr(temp.X(), temp.X(), 32);
2940 }
2941
2942 if (imm > 0 && magic < 0) {
2943 __ Add(temp, temp, dividend);
2944 } else if (imm < 0 && magic > 0) {
2945 __ Sub(temp, temp, dividend);
2946 }
2947
2948 if (shift != 0) {
2949 __ Asr(temp, temp, shift);
2950 }
2951
2952 if (instruction->IsDiv()) {
2953 __ Sub(out, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
2954 } else {
2955 __ Sub(temp, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
2956 // TODO: Strength reduction for msub.
2957 Register temp_imm = temps.AcquireSameSizeAs(out);
2958 __ Mov(temp_imm, imm);
2959 __ Msub(out, temp, temp_imm, dividend);
2960 }
2961 }
2962
GenerateIntDivForConstDenom(HDiv * instruction)2963 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) {
2964 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
2965
2966 if (imm == 0) {
2967 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
2968 return;
2969 }
2970
2971 if (IsPowerOfTwo(AbsOrMin(imm))) {
2972 GenerateIntDivForPower2Denom(instruction);
2973 } else {
2974 // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
2975 DCHECK(imm < -2 || imm > 2) << imm;
2976 GenerateDivRemWithAnyConstant(instruction);
2977 }
2978 }
2979
GenerateIntDiv(HDiv * instruction)2980 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) {
2981 DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
2982 << instruction->GetResultType();
2983
2984 if (instruction->GetLocations()->InAt(1).IsConstant()) {
2985 GenerateIntDivForConstDenom(instruction);
2986 } else {
2987 Register out = OutputRegister(instruction);
2988 Register dividend = InputRegisterAt(instruction, 0);
2989 Register divisor = InputRegisterAt(instruction, 1);
2990 __ Sdiv(out, dividend, divisor);
2991 }
2992 }
2993
VisitDiv(HDiv * div)2994 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
2995 LocationSummary* locations =
2996 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
2997 switch (div->GetResultType()) {
2998 case DataType::Type::kInt32:
2999 case DataType::Type::kInt64:
3000 locations->SetInAt(0, Location::RequiresRegister());
3001 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3002 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3003 break;
3004
3005 case DataType::Type::kFloat32:
3006 case DataType::Type::kFloat64:
3007 locations->SetInAt(0, Location::RequiresFpuRegister());
3008 locations->SetInAt(1, Location::RequiresFpuRegister());
3009 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3010 break;
3011
3012 default:
3013 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3014 }
3015 }
3016
VisitDiv(HDiv * div)3017 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3018 DataType::Type type = div->GetResultType();
3019 switch (type) {
3020 case DataType::Type::kInt32:
3021 case DataType::Type::kInt64:
3022 GenerateIntDiv(div);
3023 break;
3024
3025 case DataType::Type::kFloat32:
3026 case DataType::Type::kFloat64:
3027 __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3028 break;
3029
3030 default:
3031 LOG(FATAL) << "Unexpected div type " << type;
3032 }
3033 }
3034
VisitDivZeroCheck(HDivZeroCheck * instruction)3035 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3036 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3037 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3038 }
3039
VisitDivZeroCheck(HDivZeroCheck * instruction)3040 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3041 SlowPathCodeARM64* slow_path =
3042 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3043 codegen_->AddSlowPath(slow_path);
3044 Location value = instruction->GetLocations()->InAt(0);
3045
3046 DataType::Type type = instruction->GetType();
3047
3048 if (!DataType::IsIntegralType(type)) {
3049 LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3050 UNREACHABLE();
3051 }
3052
3053 if (value.IsConstant()) {
3054 int64_t divisor = Int64FromLocation(value);
3055 if (divisor == 0) {
3056 __ B(slow_path->GetEntryLabel());
3057 } else {
3058 // A division by a non-null constant is valid. We don't need to perform
3059 // any check, so simply fall through.
3060 }
3061 } else {
3062 __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3063 }
3064 }
3065
VisitDoubleConstant(HDoubleConstant * constant)3066 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3067 LocationSummary* locations =
3068 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3069 locations->SetOut(Location::ConstantLocation(constant));
3070 }
3071
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)3072 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3073 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3074 // Will be generated at use site.
3075 }
3076
VisitExit(HExit * exit)3077 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3078 exit->SetLocations(nullptr);
3079 }
3080
VisitExit(HExit * exit ATTRIBUTE_UNUSED)3081 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3082 }
3083
VisitFloatConstant(HFloatConstant * constant)3084 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3085 LocationSummary* locations =
3086 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3087 locations->SetOut(Location::ConstantLocation(constant));
3088 }
3089
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)3090 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3091 // Will be generated at use site.
3092 }
3093
HandleGoto(HInstruction * got,HBasicBlock * successor)3094 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3095 if (successor->IsExitBlock()) {
3096 DCHECK(got->GetPrevious()->AlwaysThrows());
3097 return; // no code needed
3098 }
3099
3100 HBasicBlock* block = got->GetBlock();
3101 HInstruction* previous = got->GetPrevious();
3102 HLoopInformation* info = block->GetLoopInformation();
3103
3104 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3105 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
3106 UseScratchRegisterScope temps(GetVIXLAssembler());
3107 Register temp1 = temps.AcquireX();
3108 Register temp2 = temps.AcquireX();
3109 __ Ldr(temp1, MemOperand(sp, 0));
3110 __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
3111 __ Add(temp2, temp2, 1);
3112 __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
3113 }
3114 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3115 return;
3116 }
3117 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3118 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3119 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
3120 }
3121 if (!codegen_->GoesToNextBlock(block, successor)) {
3122 __ B(codegen_->GetLabelOf(successor));
3123 }
3124 }
3125
VisitGoto(HGoto * got)3126 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3127 got->SetLocations(nullptr);
3128 }
3129
VisitGoto(HGoto * got)3130 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3131 HandleGoto(got, got->GetSuccessor());
3132 }
3133
VisitTryBoundary(HTryBoundary * try_boundary)3134 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3135 try_boundary->SetLocations(nullptr);
3136 }
3137
VisitTryBoundary(HTryBoundary * try_boundary)3138 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3139 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3140 if (!successor->IsExitBlock()) {
3141 HandleGoto(try_boundary, successor);
3142 }
3143 }
3144
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,vixl::aarch64::Label * true_target,vixl::aarch64::Label * false_target)3145 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3146 size_t condition_input_index,
3147 vixl::aarch64::Label* true_target,
3148 vixl::aarch64::Label* false_target) {
3149 HInstruction* cond = instruction->InputAt(condition_input_index);
3150
3151 if (true_target == nullptr && false_target == nullptr) {
3152 // Nothing to do. The code always falls through.
3153 return;
3154 } else if (cond->IsIntConstant()) {
3155 // Constant condition, statically compared against "true" (integer value 1).
3156 if (cond->AsIntConstant()->IsTrue()) {
3157 if (true_target != nullptr) {
3158 __ B(true_target);
3159 }
3160 } else {
3161 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3162 if (false_target != nullptr) {
3163 __ B(false_target);
3164 }
3165 }
3166 return;
3167 }
3168
3169 // The following code generates these patterns:
3170 // (1) true_target == nullptr && false_target != nullptr
3171 // - opposite condition true => branch to false_target
3172 // (2) true_target != nullptr && false_target == nullptr
3173 // - condition true => branch to true_target
3174 // (3) true_target != nullptr && false_target != nullptr
3175 // - condition true => branch to true_target
3176 // - branch to false_target
3177 if (IsBooleanValueOrMaterializedCondition(cond)) {
3178 // The condition instruction has been materialized, compare the output to 0.
3179 Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3180 DCHECK(cond_val.IsRegister());
3181 if (true_target == nullptr) {
3182 __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3183 } else {
3184 __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3185 }
3186 } else {
3187 // The condition instruction has not been materialized, use its inputs as
3188 // the comparison and its condition as the branch condition.
3189 HCondition* condition = cond->AsCondition();
3190
3191 DataType::Type type = condition->InputAt(0)->GetType();
3192 if (DataType::IsFloatingPointType(type)) {
3193 GenerateFcmp(condition);
3194 if (true_target == nullptr) {
3195 IfCondition opposite_condition = condition->GetOppositeCondition();
3196 __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3197 } else {
3198 __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3199 }
3200 } else {
3201 // Integer cases.
3202 Register lhs = InputRegisterAt(condition, 0);
3203 Operand rhs = InputOperandAt(condition, 1);
3204
3205 Condition arm64_cond;
3206 vixl::aarch64::Label* non_fallthrough_target;
3207 if (true_target == nullptr) {
3208 arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3209 non_fallthrough_target = false_target;
3210 } else {
3211 arm64_cond = ARM64Condition(condition->GetCondition());
3212 non_fallthrough_target = true_target;
3213 }
3214
3215 if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3216 rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3217 switch (arm64_cond) {
3218 case eq:
3219 __ Cbz(lhs, non_fallthrough_target);
3220 break;
3221 case ne:
3222 __ Cbnz(lhs, non_fallthrough_target);
3223 break;
3224 case lt:
3225 // Test the sign bit and branch accordingly.
3226 __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3227 break;
3228 case ge:
3229 // Test the sign bit and branch accordingly.
3230 __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3231 break;
3232 default:
3233 // Without the `static_cast` the compiler throws an error for
3234 // `-Werror=sign-promo`.
3235 LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3236 }
3237 } else {
3238 __ Cmp(lhs, rhs);
3239 __ B(arm64_cond, non_fallthrough_target);
3240 }
3241 }
3242 }
3243
3244 // If neither branch falls through (case 3), the conditional branch to `true_target`
3245 // was already emitted (case 2) and we need to emit a jump to `false_target`.
3246 if (true_target != nullptr && false_target != nullptr) {
3247 __ B(false_target);
3248 }
3249 }
3250
VisitIf(HIf * if_instr)3251 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3252 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3253 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3254 locations->SetInAt(0, Location::RequiresRegister());
3255 }
3256 }
3257
VisitIf(HIf * if_instr)3258 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3259 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3260 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3261 vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3262 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3263 true_target = nullptr;
3264 }
3265 vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3266 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3267 false_target = nullptr;
3268 }
3269 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3270 }
3271
VisitDeoptimize(HDeoptimize * deoptimize)3272 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3273 LocationSummary* locations = new (GetGraph()->GetAllocator())
3274 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3275 InvokeRuntimeCallingConvention calling_convention;
3276 RegisterSet caller_saves = RegisterSet::Empty();
3277 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3278 locations->SetCustomSlowPathCallerSaves(caller_saves);
3279 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3280 locations->SetInAt(0, Location::RequiresRegister());
3281 }
3282 }
3283
VisitDeoptimize(HDeoptimize * deoptimize)3284 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3285 SlowPathCodeARM64* slow_path =
3286 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3287 GenerateTestAndBranch(deoptimize,
3288 /* condition_input_index= */ 0,
3289 slow_path->GetEntryLabel(),
3290 /* false_target= */ nullptr);
3291 }
3292
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3293 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3294 LocationSummary* locations = new (GetGraph()->GetAllocator())
3295 LocationSummary(flag, LocationSummary::kNoCall);
3296 locations->SetOut(Location::RequiresRegister());
3297 }
3298
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)3299 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3300 __ Ldr(OutputRegister(flag),
3301 MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3302 }
3303
IsConditionOnFloatingPointValues(HInstruction * condition)3304 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3305 return condition->IsCondition() &&
3306 DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
3307 }
3308
GetConditionForSelect(HCondition * condition)3309 static inline Condition GetConditionForSelect(HCondition* condition) {
3310 IfCondition cond = condition->AsCondition()->GetCondition();
3311 return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3312 : ARM64Condition(cond);
3313 }
3314
VisitSelect(HSelect * select)3315 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3316 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3317 if (DataType::IsFloatingPointType(select->GetType())) {
3318 locations->SetInAt(0, Location::RequiresFpuRegister());
3319 locations->SetInAt(1, Location::RequiresFpuRegister());
3320 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3321 } else {
3322 HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3323 HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3324 bool is_true_value_constant = cst_true_value != nullptr;
3325 bool is_false_value_constant = cst_false_value != nullptr;
3326 // Ask VIXL whether we should synthesize constants in registers.
3327 // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3328 Operand true_op = is_true_value_constant ?
3329 Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3330 Operand false_op = is_false_value_constant ?
3331 Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3332 bool true_value_in_register = false;
3333 bool false_value_in_register = false;
3334 MacroAssembler::GetCselSynthesisInformation(
3335 x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3336 true_value_in_register |= !is_true_value_constant;
3337 false_value_in_register |= !is_false_value_constant;
3338
3339 locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3340 : Location::ConstantLocation(cst_true_value));
3341 locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3342 : Location::ConstantLocation(cst_false_value));
3343 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3344 }
3345
3346 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3347 locations->SetInAt(2, Location::RequiresRegister());
3348 }
3349 }
3350
VisitSelect(HSelect * select)3351 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3352 HInstruction* cond = select->GetCondition();
3353 Condition csel_cond;
3354
3355 if (IsBooleanValueOrMaterializedCondition(cond)) {
3356 if (cond->IsCondition() && cond->GetNext() == select) {
3357 // Use the condition flags set by the previous instruction.
3358 csel_cond = GetConditionForSelect(cond->AsCondition());
3359 } else {
3360 __ Cmp(InputRegisterAt(select, 2), 0);
3361 csel_cond = ne;
3362 }
3363 } else if (IsConditionOnFloatingPointValues(cond)) {
3364 GenerateFcmp(cond);
3365 csel_cond = GetConditionForSelect(cond->AsCondition());
3366 } else {
3367 __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3368 csel_cond = GetConditionForSelect(cond->AsCondition());
3369 }
3370
3371 if (DataType::IsFloatingPointType(select->GetType())) {
3372 __ Fcsel(OutputFPRegister(select),
3373 InputFPRegisterAt(select, 1),
3374 InputFPRegisterAt(select, 0),
3375 csel_cond);
3376 } else {
3377 __ Csel(OutputRegister(select),
3378 InputOperandAt(select, 1),
3379 InputOperandAt(select, 0),
3380 csel_cond);
3381 }
3382 }
3383
VisitNativeDebugInfo(HNativeDebugInfo * info)3384 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3385 new (GetGraph()->GetAllocator()) LocationSummary(info);
3386 }
3387
VisitNativeDebugInfo(HNativeDebugInfo *)3388 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
3389 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3390 }
3391
GenerateNop()3392 void CodeGeneratorARM64::GenerateNop() {
3393 __ Nop();
3394 }
3395
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3396 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3397 HandleFieldGet(instruction, instruction->GetFieldInfo());
3398 }
3399
VisitInstanceFieldGet(HInstanceFieldGet * instruction)3400 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3401 HandleFieldGet(instruction, instruction->GetFieldInfo());
3402 }
3403
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3404 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3405 HandleFieldSet(instruction);
3406 }
3407
VisitInstanceFieldSet(HInstanceFieldSet * instruction)3408 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3409 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
3410 }
3411
3412 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)3413 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
3414 if (kEmitCompilerReadBarrier &&
3415 (kUseBakerReadBarrier ||
3416 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
3417 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
3418 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
3419 return 1;
3420 }
3421 return 0;
3422 }
3423
3424 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
3425 // interface pointer, one for loading the current interface.
3426 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)3427 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
3428 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
3429 return 3;
3430 }
3431 return 1 + NumberOfInstanceOfTemps(type_check_kind);
3432 }
3433
VisitInstanceOf(HInstanceOf * instruction)3434 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
3435 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
3436 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3437 bool baker_read_barrier_slow_path = false;
3438 switch (type_check_kind) {
3439 case TypeCheckKind::kExactCheck:
3440 case TypeCheckKind::kAbstractClassCheck:
3441 case TypeCheckKind::kClassHierarchyCheck:
3442 case TypeCheckKind::kArrayObjectCheck: {
3443 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
3444 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
3445 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
3446 break;
3447 }
3448 case TypeCheckKind::kArrayCheck:
3449 case TypeCheckKind::kUnresolvedCheck:
3450 case TypeCheckKind::kInterfaceCheck:
3451 call_kind = LocationSummary::kCallOnSlowPath;
3452 break;
3453 case TypeCheckKind::kBitstringCheck:
3454 break;
3455 }
3456
3457 LocationSummary* locations =
3458 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
3459 if (baker_read_barrier_slow_path) {
3460 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
3461 }
3462 locations->SetInAt(0, Location::RequiresRegister());
3463 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
3464 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
3465 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
3466 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
3467 } else {
3468 locations->SetInAt(1, Location::RequiresRegister());
3469 }
3470 // The "out" register is used as a temporary, so it overlaps with the inputs.
3471 // Note that TypeCheckSlowPathARM64 uses this register too.
3472 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3473 // Add temps if necessary for read barriers.
3474 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
3475 }
3476
VisitInstanceOf(HInstanceOf * instruction)3477 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
3478 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3479 LocationSummary* locations = instruction->GetLocations();
3480 Location obj_loc = locations->InAt(0);
3481 Register obj = InputRegisterAt(instruction, 0);
3482 Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
3483 ? Register()
3484 : InputRegisterAt(instruction, 1);
3485 Location out_loc = locations->Out();
3486 Register out = OutputRegister(instruction);
3487 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
3488 DCHECK_LE(num_temps, 1u);
3489 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
3490 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3491 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3492 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3493 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3494
3495 vixl::aarch64::Label done, zero;
3496 SlowPathCodeARM64* slow_path = nullptr;
3497
3498 // Return 0 if `obj` is null.
3499 // Avoid null check if we know `obj` is not null.
3500 if (instruction->MustDoNullCheck()) {
3501 __ Cbz(obj, &zero);
3502 }
3503
3504 switch (type_check_kind) {
3505 case TypeCheckKind::kExactCheck: {
3506 ReadBarrierOption read_barrier_option =
3507 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3508 // /* HeapReference<Class> */ out = obj->klass_
3509 GenerateReferenceLoadTwoRegisters(instruction,
3510 out_loc,
3511 obj_loc,
3512 class_offset,
3513 maybe_temp_loc,
3514 read_barrier_option);
3515 __ Cmp(out, cls);
3516 __ Cset(out, eq);
3517 if (zero.IsLinked()) {
3518 __ B(&done);
3519 }
3520 break;
3521 }
3522
3523 case TypeCheckKind::kAbstractClassCheck: {
3524 ReadBarrierOption read_barrier_option =
3525 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3526 // /* HeapReference<Class> */ out = obj->klass_
3527 GenerateReferenceLoadTwoRegisters(instruction,
3528 out_loc,
3529 obj_loc,
3530 class_offset,
3531 maybe_temp_loc,
3532 read_barrier_option);
3533 // If the class is abstract, we eagerly fetch the super class of the
3534 // object to avoid doing a comparison we know will fail.
3535 vixl::aarch64::Label loop, success;
3536 __ Bind(&loop);
3537 // /* HeapReference<Class> */ out = out->super_class_
3538 GenerateReferenceLoadOneRegister(instruction,
3539 out_loc,
3540 super_offset,
3541 maybe_temp_loc,
3542 read_barrier_option);
3543 // If `out` is null, we use it for the result, and jump to `done`.
3544 __ Cbz(out, &done);
3545 __ Cmp(out, cls);
3546 __ B(ne, &loop);
3547 __ Mov(out, 1);
3548 if (zero.IsLinked()) {
3549 __ B(&done);
3550 }
3551 break;
3552 }
3553
3554 case TypeCheckKind::kClassHierarchyCheck: {
3555 ReadBarrierOption read_barrier_option =
3556 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3557 // /* HeapReference<Class> */ out = obj->klass_
3558 GenerateReferenceLoadTwoRegisters(instruction,
3559 out_loc,
3560 obj_loc,
3561 class_offset,
3562 maybe_temp_loc,
3563 read_barrier_option);
3564 // Walk over the class hierarchy to find a match.
3565 vixl::aarch64::Label loop, success;
3566 __ Bind(&loop);
3567 __ Cmp(out, cls);
3568 __ B(eq, &success);
3569 // /* HeapReference<Class> */ out = out->super_class_
3570 GenerateReferenceLoadOneRegister(instruction,
3571 out_loc,
3572 super_offset,
3573 maybe_temp_loc,
3574 read_barrier_option);
3575 __ Cbnz(out, &loop);
3576 // If `out` is null, we use it for the result, and jump to `done`.
3577 __ B(&done);
3578 __ Bind(&success);
3579 __ Mov(out, 1);
3580 if (zero.IsLinked()) {
3581 __ B(&done);
3582 }
3583 break;
3584 }
3585
3586 case TypeCheckKind::kArrayObjectCheck: {
3587 ReadBarrierOption read_barrier_option =
3588 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3589 // /* HeapReference<Class> */ out = obj->klass_
3590 GenerateReferenceLoadTwoRegisters(instruction,
3591 out_loc,
3592 obj_loc,
3593 class_offset,
3594 maybe_temp_loc,
3595 read_barrier_option);
3596 // Do an exact check.
3597 vixl::aarch64::Label exact_check;
3598 __ Cmp(out, cls);
3599 __ B(eq, &exact_check);
3600 // Otherwise, we need to check that the object's class is a non-primitive array.
3601 // /* HeapReference<Class> */ out = out->component_type_
3602 GenerateReferenceLoadOneRegister(instruction,
3603 out_loc,
3604 component_offset,
3605 maybe_temp_loc,
3606 read_barrier_option);
3607 // If `out` is null, we use it for the result, and jump to `done`.
3608 __ Cbz(out, &done);
3609 __ Ldrh(out, HeapOperand(out, primitive_offset));
3610 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3611 __ Cbnz(out, &zero);
3612 __ Bind(&exact_check);
3613 __ Mov(out, 1);
3614 __ B(&done);
3615 break;
3616 }
3617
3618 case TypeCheckKind::kArrayCheck: {
3619 // No read barrier since the slow path will retry upon failure.
3620 // /* HeapReference<Class> */ out = obj->klass_
3621 GenerateReferenceLoadTwoRegisters(instruction,
3622 out_loc,
3623 obj_loc,
3624 class_offset,
3625 maybe_temp_loc,
3626 kWithoutReadBarrier);
3627 __ Cmp(out, cls);
3628 DCHECK(locations->OnlyCallsOnSlowPath());
3629 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
3630 instruction, /* is_fatal= */ false);
3631 codegen_->AddSlowPath(slow_path);
3632 __ B(ne, slow_path->GetEntryLabel());
3633 __ Mov(out, 1);
3634 if (zero.IsLinked()) {
3635 __ B(&done);
3636 }
3637 break;
3638 }
3639
3640 case TypeCheckKind::kUnresolvedCheck:
3641 case TypeCheckKind::kInterfaceCheck: {
3642 // Note that we indeed only call on slow path, but we always go
3643 // into the slow path for the unresolved and interface check
3644 // cases.
3645 //
3646 // We cannot directly call the InstanceofNonTrivial runtime
3647 // entry point without resorting to a type checking slow path
3648 // here (i.e. by calling InvokeRuntime directly), as it would
3649 // require to assign fixed registers for the inputs of this
3650 // HInstanceOf instruction (following the runtime calling
3651 // convention), which might be cluttered by the potential first
3652 // read barrier emission at the beginning of this method.
3653 //
3654 // TODO: Introduce a new runtime entry point taking the object
3655 // to test (instead of its class) as argument, and let it deal
3656 // with the read barrier issues. This will let us refactor this
3657 // case of the `switch` code as it was previously (with a direct
3658 // call to the runtime not using a type checking slow path).
3659 // This should also be beneficial for the other cases above.
3660 DCHECK(locations->OnlyCallsOnSlowPath());
3661 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
3662 instruction, /* is_fatal= */ false);
3663 codegen_->AddSlowPath(slow_path);
3664 __ B(slow_path->GetEntryLabel());
3665 if (zero.IsLinked()) {
3666 __ B(&done);
3667 }
3668 break;
3669 }
3670
3671 case TypeCheckKind::kBitstringCheck: {
3672 // /* HeapReference<Class> */ temp = obj->klass_
3673 GenerateReferenceLoadTwoRegisters(instruction,
3674 out_loc,
3675 obj_loc,
3676 class_offset,
3677 maybe_temp_loc,
3678 kWithoutReadBarrier);
3679
3680 GenerateBitstringTypeCheckCompare(instruction, out);
3681 __ Cset(out, eq);
3682 if (zero.IsLinked()) {
3683 __ B(&done);
3684 }
3685 break;
3686 }
3687 }
3688
3689 if (zero.IsLinked()) {
3690 __ Bind(&zero);
3691 __ Mov(out, 0);
3692 }
3693
3694 if (done.IsLinked()) {
3695 __ Bind(&done);
3696 }
3697
3698 if (slow_path != nullptr) {
3699 __ Bind(slow_path->GetExitLabel());
3700 }
3701 }
3702
VisitCheckCast(HCheckCast * instruction)3703 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
3704 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3705 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
3706 LocationSummary* locations =
3707 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
3708 locations->SetInAt(0, Location::RequiresRegister());
3709 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
3710 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
3711 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
3712 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
3713 } else {
3714 locations->SetInAt(1, Location::RequiresRegister());
3715 }
3716 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
3717 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
3718 }
3719
VisitCheckCast(HCheckCast * instruction)3720 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
3721 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3722 LocationSummary* locations = instruction->GetLocations();
3723 Location obj_loc = locations->InAt(0);
3724 Register obj = InputRegisterAt(instruction, 0);
3725 Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
3726 ? Register()
3727 : InputRegisterAt(instruction, 1);
3728 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
3729 DCHECK_GE(num_temps, 1u);
3730 DCHECK_LE(num_temps, 3u);
3731 Location temp_loc = locations->GetTemp(0);
3732 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
3733 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
3734 Register temp = WRegisterFrom(temp_loc);
3735 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3736 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3737 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3738 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3739 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
3740 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
3741 const uint32_t object_array_data_offset =
3742 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3743
3744 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
3745 SlowPathCodeARM64* type_check_slow_path =
3746 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
3747 instruction, is_type_check_slow_path_fatal);
3748 codegen_->AddSlowPath(type_check_slow_path);
3749
3750 vixl::aarch64::Label done;
3751 // Avoid null check if we know obj is not null.
3752 if (instruction->MustDoNullCheck()) {
3753 __ Cbz(obj, &done);
3754 }
3755
3756 switch (type_check_kind) {
3757 case TypeCheckKind::kExactCheck:
3758 case TypeCheckKind::kArrayCheck: {
3759 // /* HeapReference<Class> */ temp = obj->klass_
3760 GenerateReferenceLoadTwoRegisters(instruction,
3761 temp_loc,
3762 obj_loc,
3763 class_offset,
3764 maybe_temp2_loc,
3765 kWithoutReadBarrier);
3766
3767 __ Cmp(temp, cls);
3768 // Jump to slow path for throwing the exception or doing a
3769 // more involved array check.
3770 __ B(ne, type_check_slow_path->GetEntryLabel());
3771 break;
3772 }
3773
3774 case TypeCheckKind::kAbstractClassCheck: {
3775 // /* HeapReference<Class> */ temp = obj->klass_
3776 GenerateReferenceLoadTwoRegisters(instruction,
3777 temp_loc,
3778 obj_loc,
3779 class_offset,
3780 maybe_temp2_loc,
3781 kWithoutReadBarrier);
3782
3783 // If the class is abstract, we eagerly fetch the super class of the
3784 // object to avoid doing a comparison we know will fail.
3785 vixl::aarch64::Label loop;
3786 __ Bind(&loop);
3787 // /* HeapReference<Class> */ temp = temp->super_class_
3788 GenerateReferenceLoadOneRegister(instruction,
3789 temp_loc,
3790 super_offset,
3791 maybe_temp2_loc,
3792 kWithoutReadBarrier);
3793
3794 // If the class reference currently in `temp` is null, jump to the slow path to throw the
3795 // exception.
3796 __ Cbz(temp, type_check_slow_path->GetEntryLabel());
3797 // Otherwise, compare classes.
3798 __ Cmp(temp, cls);
3799 __ B(ne, &loop);
3800 break;
3801 }
3802
3803 case TypeCheckKind::kClassHierarchyCheck: {
3804 // /* HeapReference<Class> */ temp = obj->klass_
3805 GenerateReferenceLoadTwoRegisters(instruction,
3806 temp_loc,
3807 obj_loc,
3808 class_offset,
3809 maybe_temp2_loc,
3810 kWithoutReadBarrier);
3811
3812 // Walk over the class hierarchy to find a match.
3813 vixl::aarch64::Label loop;
3814 __ Bind(&loop);
3815 __ Cmp(temp, cls);
3816 __ B(eq, &done);
3817
3818 // /* HeapReference<Class> */ temp = temp->super_class_
3819 GenerateReferenceLoadOneRegister(instruction,
3820 temp_loc,
3821 super_offset,
3822 maybe_temp2_loc,
3823 kWithoutReadBarrier);
3824
3825 // If the class reference currently in `temp` is not null, jump
3826 // back at the beginning of the loop.
3827 __ Cbnz(temp, &loop);
3828 // Otherwise, jump to the slow path to throw the exception.
3829 __ B(type_check_slow_path->GetEntryLabel());
3830 break;
3831 }
3832
3833 case TypeCheckKind::kArrayObjectCheck: {
3834 // /* HeapReference<Class> */ temp = obj->klass_
3835 GenerateReferenceLoadTwoRegisters(instruction,
3836 temp_loc,
3837 obj_loc,
3838 class_offset,
3839 maybe_temp2_loc,
3840 kWithoutReadBarrier);
3841
3842 // Do an exact check.
3843 __ Cmp(temp, cls);
3844 __ B(eq, &done);
3845
3846 // Otherwise, we need to check that the object's class is a non-primitive array.
3847 // /* HeapReference<Class> */ temp = temp->component_type_
3848 GenerateReferenceLoadOneRegister(instruction,
3849 temp_loc,
3850 component_offset,
3851 maybe_temp2_loc,
3852 kWithoutReadBarrier);
3853
3854 // If the component type is null, jump to the slow path to throw the exception.
3855 __ Cbz(temp, type_check_slow_path->GetEntryLabel());
3856 // Otherwise, the object is indeed an array. Further check that this component type is not a
3857 // primitive type.
3858 __ Ldrh(temp, HeapOperand(temp, primitive_offset));
3859 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3860 __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
3861 break;
3862 }
3863
3864 case TypeCheckKind::kUnresolvedCheck:
3865 // We always go into the type check slow path for the unresolved check cases.
3866 //
3867 // We cannot directly call the CheckCast runtime entry point
3868 // without resorting to a type checking slow path here (i.e. by
3869 // calling InvokeRuntime directly), as it would require to
3870 // assign fixed registers for the inputs of this HInstanceOf
3871 // instruction (following the runtime calling convention), which
3872 // might be cluttered by the potential first read barrier
3873 // emission at the beginning of this method.
3874 __ B(type_check_slow_path->GetEntryLabel());
3875 break;
3876 case TypeCheckKind::kInterfaceCheck: {
3877 // /* HeapReference<Class> */ temp = obj->klass_
3878 GenerateReferenceLoadTwoRegisters(instruction,
3879 temp_loc,
3880 obj_loc,
3881 class_offset,
3882 maybe_temp2_loc,
3883 kWithoutReadBarrier);
3884
3885 // /* HeapReference<Class> */ temp = temp->iftable_
3886 GenerateReferenceLoadTwoRegisters(instruction,
3887 temp_loc,
3888 temp_loc,
3889 iftable_offset,
3890 maybe_temp2_loc,
3891 kWithoutReadBarrier);
3892 // Iftable is never null.
3893 __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
3894 // Loop through the iftable and check if any class matches.
3895 vixl::aarch64::Label start_loop;
3896 __ Bind(&start_loop);
3897 __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
3898 __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
3899 GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
3900 // Go to next interface.
3901 __ Add(temp, temp, 2 * kHeapReferenceSize);
3902 __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
3903 // Compare the classes and continue the loop if they do not match.
3904 __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
3905 __ B(ne, &start_loop);
3906 break;
3907 }
3908
3909 case TypeCheckKind::kBitstringCheck: {
3910 // /* HeapReference<Class> */ temp = obj->klass_
3911 GenerateReferenceLoadTwoRegisters(instruction,
3912 temp_loc,
3913 obj_loc,
3914 class_offset,
3915 maybe_temp2_loc,
3916 kWithoutReadBarrier);
3917
3918 GenerateBitstringTypeCheckCompare(instruction, temp);
3919 __ B(ne, type_check_slow_path->GetEntryLabel());
3920 break;
3921 }
3922 }
3923 __ Bind(&done);
3924
3925 __ Bind(type_check_slow_path->GetExitLabel());
3926 }
3927
VisitIntConstant(HIntConstant * constant)3928 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
3929 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
3930 locations->SetOut(Location::ConstantLocation(constant));
3931 }
3932
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)3933 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
3934 // Will be generated at use site.
3935 }
3936
VisitNullConstant(HNullConstant * constant)3937 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
3938 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
3939 locations->SetOut(Location::ConstantLocation(constant));
3940 }
3941
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)3942 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
3943 // Will be generated at use site.
3944 }
3945
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3946 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3947 // The trampoline uses the same calling convention as dex calling conventions,
3948 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3949 // the method_idx.
3950 HandleInvoke(invoke);
3951 }
3952
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3953 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3954 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3955 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
3956 }
3957
HandleInvoke(HInvoke * invoke)3958 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
3959 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
3960 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3961 }
3962
VisitInvokeInterface(HInvokeInterface * invoke)3963 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
3964 HandleInvoke(invoke);
3965 }
3966
VisitInvokeInterface(HInvokeInterface * invoke)3967 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
3968 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3969 LocationSummary* locations = invoke->GetLocations();
3970 Register temp = XRegisterFrom(locations->GetTemp(0));
3971 Location receiver = locations->InAt(0);
3972 Offset class_offset = mirror::Object::ClassOffset();
3973 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
3974
3975 // The register ip1 is required to be used for the hidden argument in
3976 // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
3977 MacroAssembler* masm = GetVIXLAssembler();
3978 UseScratchRegisterScope scratch_scope(masm);
3979 scratch_scope.Exclude(ip1);
3980 __ Mov(ip1, invoke->GetDexMethodIndex());
3981
3982 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
3983 if (receiver.IsStackSlot()) {
3984 __ Ldr(temp.W(), StackOperandFrom(receiver));
3985 {
3986 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3987 // /* HeapReference<Class> */ temp = temp->klass_
3988 __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
3989 codegen_->MaybeRecordImplicitNullCheck(invoke);
3990 }
3991 } else {
3992 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3993 // /* HeapReference<Class> */ temp = receiver->klass_
3994 __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
3995 codegen_->MaybeRecordImplicitNullCheck(invoke);
3996 }
3997
3998 // Instead of simply (possibly) unpoisoning `temp` here, we should
3999 // emit a read barrier for the previous class reference load.
4000 // However this is not required in practice, as this is an
4001 // intermediate/temporary reference and because the current
4002 // concurrent copying collector keeps the from-space memory
4003 // intact/accessible until the end of the marking phase (the
4004 // concurrent copying collector may not in the future).
4005 GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4006 __ Ldr(temp,
4007 MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4008 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4009 invoke->GetImtIndex(), kArm64PointerSize));
4010 // temp = temp->GetImtEntryAt(method_offset);
4011 __ Ldr(temp, MemOperand(temp, method_offset));
4012 // lr = temp->GetEntryPoint();
4013 __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4014
4015 {
4016 // Ensure the pc position is recorded immediately after the `blr` instruction.
4017 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4018
4019 // lr();
4020 __ blr(lr);
4021 DCHECK(!codegen_->IsLeafMethod());
4022 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4023 }
4024
4025 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4026 }
4027
VisitInvokeVirtual(HInvokeVirtual * invoke)4028 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4029 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4030 if (intrinsic.TryDispatch(invoke)) {
4031 return;
4032 }
4033
4034 HandleInvoke(invoke);
4035 }
4036
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4037 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4038 // Explicit clinit checks triggered by static invokes must have been pruned by
4039 // art::PrepareForRegisterAllocation.
4040 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4041
4042 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4043 if (intrinsic.TryDispatch(invoke)) {
4044 return;
4045 }
4046
4047 HandleInvoke(invoke);
4048 }
4049
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorARM64 * codegen)4050 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4051 if (invoke->GetLocations()->Intrinsified()) {
4052 IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4053 intrinsic.Dispatch(invoke);
4054 return true;
4055 }
4056 return false;
4057 }
4058
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)4059 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4060 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4061 ArtMethod* method ATTRIBUTE_UNUSED) {
4062 // On ARM64 we support all dispatch types.
4063 return desired_dispatch_info;
4064 }
4065
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4066 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
4067 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4068 // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4069 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
4070 switch (invoke->GetMethodLoadKind()) {
4071 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4072 uint32_t offset =
4073 GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4074 // temp = thread->string_init_entrypoint
4075 __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4076 break;
4077 }
4078 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4079 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4080 break;
4081 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4082 DCHECK(GetCompilerOptions().IsBootImage());
4083 // Add ADRP with its PC-relative method patch.
4084 vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod());
4085 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4086 // Add ADD with its PC-relative method patch.
4087 vixl::aarch64::Label* add_label =
4088 NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label);
4089 EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
4090 break;
4091 }
4092 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
4093 // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4094 uint32_t boot_image_offset = GetBootImageOffset(invoke);
4095 vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
4096 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4097 // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4098 vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
4099 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
4100 EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
4101 break;
4102 }
4103 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4104 // Add ADRP with its PC-relative .bss entry patch.
4105 MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
4106 vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
4107 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4108 // Add LDR with its PC-relative .bss entry patch.
4109 vixl::aarch64::Label* ldr_label =
4110 NewMethodBssEntryPatch(target_method, adrp_label);
4111 EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4112 break;
4113 }
4114 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
4115 // Load method address from literal pool.
4116 __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
4117 break;
4118 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
4119 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4120 return; // No code pointer retrieval; the runtime performs the call directly.
4121 }
4122 }
4123
4124 switch (invoke->GetCodePtrLocation()) {
4125 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4126 {
4127 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4128 ExactAssemblyScope eas(GetVIXLAssembler(),
4129 kInstructionSize,
4130 CodeBufferCheckScope::kExactSize);
4131 __ bl(&frame_entry_label_);
4132 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4133 }
4134 break;
4135 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4136 // LR = callee_method->entry_point_from_quick_compiled_code_;
4137 __ Ldr(lr, MemOperand(
4138 XRegisterFrom(callee_method),
4139 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
4140 {
4141 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4142 ExactAssemblyScope eas(GetVIXLAssembler(),
4143 kInstructionSize,
4144 CodeBufferCheckScope::kExactSize);
4145 // lr()
4146 __ blr(lr);
4147 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4148 }
4149 break;
4150 }
4151
4152 DCHECK(!IsLeafMethod());
4153 }
4154
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)4155 void CodeGeneratorARM64::GenerateVirtualCall(
4156 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4157 // Use the calling convention instead of the location of the receiver, as
4158 // intrinsics may have put the receiver in a different register. In the intrinsics
4159 // slow path, the arguments have been moved to the right place, so here we are
4160 // guaranteed that the receiver is the first register of the calling convention.
4161 InvokeDexCallingConvention calling_convention;
4162 Register receiver = calling_convention.GetRegisterAt(0);
4163 Register temp = XRegisterFrom(temp_in);
4164 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4165 invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4166 Offset class_offset = mirror::Object::ClassOffset();
4167 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4168
4169 DCHECK(receiver.IsRegister());
4170
4171 {
4172 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4173 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4174 // /* HeapReference<Class> */ temp = receiver->klass_
4175 __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4176 MaybeRecordImplicitNullCheck(invoke);
4177 }
4178 // Instead of simply (possibly) unpoisoning `temp` here, we should
4179 // emit a read barrier for the previous class reference load.
4180 // intermediate/temporary reference and because the current
4181 // concurrent copying collector keeps the from-space memory
4182 // intact/accessible until the end of the marking phase (the
4183 // concurrent copying collector may not in the future).
4184 GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4185 // temp = temp->GetMethodAt(method_offset);
4186 __ Ldr(temp, MemOperand(temp, method_offset));
4187 // lr = temp->GetEntryPoint();
4188 __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4189 {
4190 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4191 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4192 // lr();
4193 __ blr(lr);
4194 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4195 }
4196 }
4197
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4198 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4199 HandleInvoke(invoke);
4200 }
4201
VisitInvokePolymorphic(HInvokePolymorphic * invoke)4202 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4203 codegen_->GenerateInvokePolymorphicCall(invoke);
4204 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4205 }
4206
VisitInvokeCustom(HInvokeCustom * invoke)4207 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4208 HandleInvoke(invoke);
4209 }
4210
VisitInvokeCustom(HInvokeCustom * invoke)4211 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4212 codegen_->GenerateInvokeCustomCall(invoke);
4213 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4214 }
4215
NewBootImageIntrinsicPatch(uint32_t intrinsic_data,vixl::aarch64::Label * adrp_label)4216 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch(
4217 uint32_t intrinsic_data,
4218 vixl::aarch64::Label* adrp_label) {
4219 return NewPcRelativePatch(
4220 /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_intrinsic_patches_);
4221 }
4222
NewBootImageRelRoPatch(uint32_t boot_image_offset,vixl::aarch64::Label * adrp_label)4223 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
4224 uint32_t boot_image_offset,
4225 vixl::aarch64::Label* adrp_label) {
4226 return NewPcRelativePatch(
4227 /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_);
4228 }
4229
NewBootImageMethodPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4230 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
4231 MethodReference target_method,
4232 vixl::aarch64::Label* adrp_label) {
4233 return NewPcRelativePatch(
4234 target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
4235 }
4236
NewMethodBssEntryPatch(MethodReference target_method,vixl::aarch64::Label * adrp_label)4237 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
4238 MethodReference target_method,
4239 vixl::aarch64::Label* adrp_label) {
4240 return NewPcRelativePatch(
4241 target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
4242 }
4243
NewBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4244 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
4245 const DexFile& dex_file,
4246 dex::TypeIndex type_index,
4247 vixl::aarch64::Label* adrp_label) {
4248 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
4249 }
4250
NewBssEntryTypePatch(const DexFile & dex_file,dex::TypeIndex type_index,vixl::aarch64::Label * adrp_label)4251 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
4252 const DexFile& dex_file,
4253 dex::TypeIndex type_index,
4254 vixl::aarch64::Label* adrp_label) {
4255 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
4256 }
4257
NewBootImageStringPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4258 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
4259 const DexFile& dex_file,
4260 dex::StringIndex string_index,
4261 vixl::aarch64::Label* adrp_label) {
4262 return NewPcRelativePatch(
4263 &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
4264 }
4265
NewStringBssEntryPatch(const DexFile & dex_file,dex::StringIndex string_index,vixl::aarch64::Label * adrp_label)4266 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
4267 const DexFile& dex_file,
4268 dex::StringIndex string_index,
4269 vixl::aarch64::Label* adrp_label) {
4270 return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
4271 }
4272
EmitBakerReadBarrierCbnz(uint32_t custom_data)4273 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
4274 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
4275 if (Runtime::Current()->UseJitCompilation()) {
4276 auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
4277 vixl::aarch64::Label* slow_path_entry = &it->second.label;
4278 __ cbnz(mr, slow_path_entry);
4279 } else {
4280 baker_read_barrier_patches_.emplace_back(custom_data);
4281 vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label;
4282 __ bind(cbnz_label);
4283 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
4284 }
4285 }
4286
NewPcRelativePatch(const DexFile * dex_file,uint32_t offset_or_index,vixl::aarch64::Label * adrp_label,ArenaDeque<PcRelativePatchInfo> * patches)4287 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
4288 const DexFile* dex_file,
4289 uint32_t offset_or_index,
4290 vixl::aarch64::Label* adrp_label,
4291 ArenaDeque<PcRelativePatchInfo>* patches) {
4292 // Add a patch entry and return the label.
4293 patches->emplace_back(dex_file, offset_or_index);
4294 PcRelativePatchInfo* info = &patches->back();
4295 vixl::aarch64::Label* label = &info->label;
4296 // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
4297 info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
4298 return label;
4299 }
4300
DeduplicateBootImageAddressLiteral(uint64_t address)4301 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
4302 uint64_t address) {
4303 return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
4304 }
4305
DeduplicateJitStringLiteral(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)4306 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
4307 const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
4308 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
4309 return jit_string_patches_.GetOrCreate(
4310 StringReference(&dex_file, string_index),
4311 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
4312 }
4313
DeduplicateJitClassLiteral(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)4314 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
4315 const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
4316 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
4317 return jit_class_patches_.GetOrCreate(
4318 TypeReference(&dex_file, type_index),
4319 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
4320 }
4321
EmitAdrpPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register reg)4322 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
4323 vixl::aarch64::Register reg) {
4324 DCHECK(reg.IsX());
4325 SingleEmissionCheckScope guard(GetVIXLAssembler());
4326 __ Bind(fixup_label);
4327 __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
4328 }
4329
EmitAddPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4330 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
4331 vixl::aarch64::Register out,
4332 vixl::aarch64::Register base) {
4333 DCHECK(out.IsX());
4334 DCHECK(base.IsX());
4335 SingleEmissionCheckScope guard(GetVIXLAssembler());
4336 __ Bind(fixup_label);
4337 __ add(out, base, Operand(/* offset placeholder */ 0));
4338 }
4339
EmitLdrOffsetPlaceholder(vixl::aarch64::Label * fixup_label,vixl::aarch64::Register out,vixl::aarch64::Register base)4340 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
4341 vixl::aarch64::Register out,
4342 vixl::aarch64::Register base) {
4343 DCHECK(base.IsX());
4344 SingleEmissionCheckScope guard(GetVIXLAssembler());
4345 __ Bind(fixup_label);
4346 __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
4347 }
4348
LoadBootImageAddress(vixl::aarch64::Register reg,uint32_t boot_image_reference)4349 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
4350 uint32_t boot_image_reference) {
4351 if (GetCompilerOptions().IsBootImage()) {
4352 // Add ADRP with its PC-relative type patch.
4353 vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference);
4354 EmitAdrpPlaceholder(adrp_label, reg.X());
4355 // Add ADD with its PC-relative type patch.
4356 vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label);
4357 EmitAddPlaceholder(add_label, reg.X(), reg.X());
4358 } else if (GetCompilerOptions().GetCompilePic()) {
4359 // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4360 vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference);
4361 EmitAdrpPlaceholder(adrp_label, reg.X());
4362 // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4363 vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label);
4364 EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
4365 } else {
4366 DCHECK(Runtime::Current()->UseJitCompilation());
4367 gc::Heap* heap = Runtime::Current()->GetHeap();
4368 DCHECK(!heap->GetBootImageSpaces().empty());
4369 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
4370 __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address)));
4371 }
4372 }
4373
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)4374 void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
4375 uint32_t boot_image_offset) {
4376 DCHECK(invoke->IsStatic());
4377 InvokeRuntimeCallingConvention calling_convention;
4378 Register argument = calling_convention.GetRegisterAt(0);
4379 if (GetCompilerOptions().IsBootImage()) {
4380 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
4381 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
4382 MethodReference target_method = invoke->GetTargetMethod();
4383 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
4384 // Add ADRP with its PC-relative type patch.
4385 vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx);
4386 EmitAdrpPlaceholder(adrp_label, argument.X());
4387 // Add ADD with its PC-relative type patch.
4388 vixl::aarch64::Label* add_label =
4389 NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label);
4390 EmitAddPlaceholder(add_label, argument.X(), argument.X());
4391 } else {
4392 LoadBootImageAddress(argument, boot_image_offset);
4393 }
4394 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
4395 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4396 }
4397
4398 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)4399 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
4400 const ArenaDeque<PcRelativePatchInfo>& infos,
4401 ArenaVector<linker::LinkerPatch>* linker_patches) {
4402 for (const PcRelativePatchInfo& info : infos) {
4403 linker_patches->push_back(Factory(info.label.GetLocation(),
4404 info.target_dex_file,
4405 info.pc_insn_label->GetLocation(),
4406 info.offset_or_index));
4407 }
4408 }
4409
4410 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)4411 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
4412 const DexFile* target_dex_file,
4413 uint32_t pc_insn_offset,
4414 uint32_t boot_image_offset) {
4415 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
4416 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
4417 }
4418
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)4419 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
4420 DCHECK(linker_patches->empty());
4421 size_t size =
4422 boot_image_method_patches_.size() +
4423 method_bss_entry_patches_.size() +
4424 boot_image_type_patches_.size() +
4425 type_bss_entry_patches_.size() +
4426 boot_image_string_patches_.size() +
4427 string_bss_entry_patches_.size() +
4428 boot_image_intrinsic_patches_.size() +
4429 baker_read_barrier_patches_.size();
4430 linker_patches->reserve(size);
4431 if (GetCompilerOptions().IsBootImage()) {
4432 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
4433 boot_image_method_patches_, linker_patches);
4434 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
4435 boot_image_type_patches_, linker_patches);
4436 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
4437 boot_image_string_patches_, linker_patches);
4438 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
4439 boot_image_intrinsic_patches_, linker_patches);
4440 } else {
4441 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
4442 boot_image_method_patches_, linker_patches);
4443 DCHECK(boot_image_type_patches_.empty());
4444 DCHECK(boot_image_string_patches_.empty());
4445 DCHECK(boot_image_intrinsic_patches_.empty());
4446 }
4447 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
4448 method_bss_entry_patches_, linker_patches);
4449 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
4450 type_bss_entry_patches_, linker_patches);
4451 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
4452 string_bss_entry_patches_, linker_patches);
4453 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
4454 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
4455 info.label.GetLocation(), info.custom_data));
4456 }
4457 DCHECK_EQ(size, linker_patches->size());
4458 }
4459
NeedsThunkCode(const linker::LinkerPatch & patch) const4460 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
4461 return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
4462 patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
4463 }
4464
EmitThunkCode(const linker::LinkerPatch & patch,ArenaVector<uint8_t> * code,std::string * debug_name)4465 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
4466 /*out*/ ArenaVector<uint8_t>* code,
4467 /*out*/ std::string* debug_name) {
4468 Arm64Assembler assembler(GetGraph()->GetAllocator());
4469 switch (patch.GetType()) {
4470 case linker::LinkerPatch::Type::kCallRelative: {
4471 // The thunk just uses the entry point in the ArtMethod. This works even for calls
4472 // to the generic JNI and interpreter trampolines.
4473 Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
4474 kArm64PointerSize).Int32Value());
4475 assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
4476 if (GetCompilerOptions().GenerateAnyDebugInfo()) {
4477 *debug_name = "MethodCallThunk";
4478 }
4479 break;
4480 }
4481 case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
4482 DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
4483 CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
4484 break;
4485 }
4486 default:
4487 LOG(FATAL) << "Unexpected patch type " << patch.GetType();
4488 UNREACHABLE();
4489 }
4490
4491 // Ensure we emit the literal pool if any.
4492 assembler.FinalizeCode();
4493 code->resize(assembler.CodeSize());
4494 MemoryRegion code_region(code->data(), code->size());
4495 assembler.FinalizeInstructions(code_region);
4496 }
4497
DeduplicateUint32Literal(uint32_t value)4498 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
4499 return uint32_literals_.GetOrCreate(
4500 value,
4501 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
4502 }
4503
DeduplicateUint64Literal(uint64_t value)4504 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
4505 return uint64_literals_.GetOrCreate(
4506 value,
4507 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
4508 }
4509
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)4510 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4511 // Explicit clinit checks triggered by static invokes must have been pruned by
4512 // art::PrepareForRegisterAllocation.
4513 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4514
4515 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4516 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4517 return;
4518 }
4519
4520 {
4521 // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
4522 // are no pools emitted.
4523 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4524 LocationSummary* locations = invoke->GetLocations();
4525 codegen_->GenerateStaticOrDirectCall(
4526 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
4527 }
4528
4529 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4530 }
4531
VisitInvokeVirtual(HInvokeVirtual * invoke)4532 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4533 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4534 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4535 return;
4536 }
4537
4538 {
4539 // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
4540 // are no pools emitted.
4541 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4542 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
4543 DCHECK(!codegen_->IsLeafMethod());
4544 }
4545
4546 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4547 }
4548
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)4549 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
4550 HLoadClass::LoadKind desired_class_load_kind) {
4551 switch (desired_class_load_kind) {
4552 case HLoadClass::LoadKind::kInvalid:
4553 LOG(FATAL) << "UNREACHABLE";
4554 UNREACHABLE();
4555 case HLoadClass::LoadKind::kReferrersClass:
4556 break;
4557 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
4558 case HLoadClass::LoadKind::kBootImageRelRo:
4559 case HLoadClass::LoadKind::kBssEntry:
4560 DCHECK(!Runtime::Current()->UseJitCompilation());
4561 break;
4562 case HLoadClass::LoadKind::kJitBootImageAddress:
4563 case HLoadClass::LoadKind::kJitTableAddress:
4564 DCHECK(Runtime::Current()->UseJitCompilation());
4565 break;
4566 case HLoadClass::LoadKind::kRuntimeCall:
4567 break;
4568 }
4569 return desired_class_load_kind;
4570 }
4571
VisitLoadClass(HLoadClass * cls)4572 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
4573 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4574 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
4575 InvokeRuntimeCallingConvention calling_convention;
4576 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
4577 cls,
4578 LocationFrom(calling_convention.GetRegisterAt(0)),
4579 LocationFrom(vixl::aarch64::x0));
4580 DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
4581 return;
4582 }
4583 DCHECK(!cls->NeedsAccessCheck());
4584
4585 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
4586 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
4587 ? LocationSummary::kCallOnSlowPath
4588 : LocationSummary::kNoCall;
4589 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
4590 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
4591 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4592 }
4593
4594 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
4595 locations->SetInAt(0, Location::RequiresRegister());
4596 }
4597 locations->SetOut(Location::RequiresRegister());
4598 if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
4599 if (!kUseReadBarrier || kUseBakerReadBarrier) {
4600 // Rely on the type resolution or initialization and marking to save everything we need.
4601 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
4602 } else {
4603 // For non-Baker read barrier we have a temp-clobbering call.
4604 }
4605 }
4606 }
4607
4608 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
4609 // move.
VisitLoadClass(HLoadClass * cls)4610 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
4611 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4612 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
4613 codegen_->GenerateLoadClassRuntimeCall(cls);
4614 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4615 return;
4616 }
4617 DCHECK(!cls->NeedsAccessCheck());
4618
4619 Location out_loc = cls->GetLocations()->Out();
4620 Register out = OutputRegister(cls);
4621
4622 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
4623 ? kWithoutReadBarrier
4624 : kCompilerReadBarrierOption;
4625 bool generate_null_check = false;
4626 switch (load_kind) {
4627 case HLoadClass::LoadKind::kReferrersClass: {
4628 DCHECK(!cls->CanCallRuntime());
4629 DCHECK(!cls->MustGenerateClinitCheck());
4630 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
4631 Register current_method = InputRegisterAt(cls, 0);
4632 codegen_->GenerateGcRootFieldLoad(cls,
4633 out_loc,
4634 current_method,
4635 ArtMethod::DeclaringClassOffset().Int32Value(),
4636 /* fixup_label= */ nullptr,
4637 read_barrier_option);
4638 break;
4639 }
4640 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
4641 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4642 // Add ADRP with its PC-relative type patch.
4643 const DexFile& dex_file = cls->GetDexFile();
4644 dex::TypeIndex type_index = cls->GetTypeIndex();
4645 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
4646 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4647 // Add ADD with its PC-relative type patch.
4648 vixl::aarch64::Label* add_label =
4649 codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
4650 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
4651 break;
4652 }
4653 case HLoadClass::LoadKind::kBootImageRelRo: {
4654 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
4655 uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
4656 // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4657 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
4658 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4659 // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4660 vixl::aarch64::Label* ldr_label =
4661 codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
4662 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
4663 break;
4664 }
4665 case HLoadClass::LoadKind::kBssEntry: {
4666 // Add ADRP with its PC-relative Class .bss entry patch.
4667 const DexFile& dex_file = cls->GetDexFile();
4668 dex::TypeIndex type_index = cls->GetTypeIndex();
4669 vixl::aarch64::Register temp = XRegisterFrom(out_loc);
4670 vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
4671 codegen_->EmitAdrpPlaceholder(adrp_label, temp);
4672 // Add LDR with its PC-relative Class .bss entry patch.
4673 vixl::aarch64::Label* ldr_label =
4674 codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
4675 // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
4676 codegen_->GenerateGcRootFieldLoad(cls,
4677 out_loc,
4678 temp,
4679 /* offset placeholder */ 0u,
4680 ldr_label,
4681 read_barrier_option);
4682 generate_null_check = true;
4683 break;
4684 }
4685 case HLoadClass::LoadKind::kJitBootImageAddress: {
4686 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4687 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
4688 DCHECK_NE(address, 0u);
4689 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
4690 break;
4691 }
4692 case HLoadClass::LoadKind::kJitTableAddress: {
4693 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
4694 cls->GetTypeIndex(),
4695 cls->GetClass()));
4696 codegen_->GenerateGcRootFieldLoad(cls,
4697 out_loc,
4698 out.X(),
4699 /* offset= */ 0,
4700 /* fixup_label= */ nullptr,
4701 read_barrier_option);
4702 break;
4703 }
4704 case HLoadClass::LoadKind::kRuntimeCall:
4705 case HLoadClass::LoadKind::kInvalid:
4706 LOG(FATAL) << "UNREACHABLE";
4707 UNREACHABLE();
4708 }
4709
4710 bool do_clinit = cls->MustGenerateClinitCheck();
4711 if (generate_null_check || do_clinit) {
4712 DCHECK(cls->CanCallRuntime());
4713 SlowPathCodeARM64* slow_path =
4714 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls);
4715 codegen_->AddSlowPath(slow_path);
4716 if (generate_null_check) {
4717 __ Cbz(out, slow_path->GetEntryLabel());
4718 }
4719 if (cls->MustGenerateClinitCheck()) {
4720 GenerateClassInitializationCheck(slow_path, out);
4721 } else {
4722 __ Bind(slow_path->GetExitLabel());
4723 }
4724 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4725 }
4726 }
4727
VisitLoadMethodHandle(HLoadMethodHandle * load)4728 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
4729 InvokeRuntimeCallingConvention calling_convention;
4730 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
4731 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
4732 }
4733
VisitLoadMethodHandle(HLoadMethodHandle * load)4734 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
4735 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
4736 }
4737
VisitLoadMethodType(HLoadMethodType * load)4738 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) {
4739 InvokeRuntimeCallingConvention calling_convention;
4740 Location location = LocationFrom(calling_convention.GetRegisterAt(0));
4741 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
4742 }
4743
VisitLoadMethodType(HLoadMethodType * load)4744 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) {
4745 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
4746 }
4747
GetExceptionTlsAddress()4748 static MemOperand GetExceptionTlsAddress() {
4749 return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
4750 }
4751
VisitLoadException(HLoadException * load)4752 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
4753 LocationSummary* locations =
4754 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
4755 locations->SetOut(Location::RequiresRegister());
4756 }
4757
VisitLoadException(HLoadException * instruction)4758 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
4759 __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
4760 }
4761
VisitClearException(HClearException * clear)4762 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
4763 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
4764 }
4765
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)4766 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
4767 __ Str(wzr, GetExceptionTlsAddress());
4768 }
4769
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)4770 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
4771 HLoadString::LoadKind desired_string_load_kind) {
4772 switch (desired_string_load_kind) {
4773 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
4774 case HLoadString::LoadKind::kBootImageRelRo:
4775 case HLoadString::LoadKind::kBssEntry:
4776 DCHECK(!Runtime::Current()->UseJitCompilation());
4777 break;
4778 case HLoadString::LoadKind::kJitBootImageAddress:
4779 case HLoadString::LoadKind::kJitTableAddress:
4780 DCHECK(Runtime::Current()->UseJitCompilation());
4781 break;
4782 case HLoadString::LoadKind::kRuntimeCall:
4783 break;
4784 }
4785 return desired_string_load_kind;
4786 }
4787
VisitLoadString(HLoadString * load)4788 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
4789 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
4790 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
4791 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
4792 InvokeRuntimeCallingConvention calling_convention;
4793 locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
4794 } else {
4795 locations->SetOut(Location::RequiresRegister());
4796 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
4797 if (!kUseReadBarrier || kUseBakerReadBarrier) {
4798 // Rely on the pResolveString and marking to save everything we need.
4799 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
4800 } else {
4801 // For non-Baker read barrier we have a temp-clobbering call.
4802 }
4803 }
4804 }
4805 }
4806
4807 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
4808 // move.
VisitLoadString(HLoadString * load)4809 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
4810 Register out = OutputRegister(load);
4811 Location out_loc = load->GetLocations()->Out();
4812
4813 switch (load->GetLoadKind()) {
4814 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
4815 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
4816 // Add ADRP with its PC-relative String patch.
4817 const DexFile& dex_file = load->GetDexFile();
4818 const dex::StringIndex string_index = load->GetStringIndex();
4819 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
4820 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4821 // Add ADD with its PC-relative String patch.
4822 vixl::aarch64::Label* add_label =
4823 codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
4824 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
4825 return;
4826 }
4827 case HLoadString::LoadKind::kBootImageRelRo: {
4828 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
4829 // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4830 uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
4831 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
4832 codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4833 // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4834 vixl::aarch64::Label* ldr_label =
4835 codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
4836 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
4837 return;
4838 }
4839 case HLoadString::LoadKind::kBssEntry: {
4840 // Add ADRP with its PC-relative String .bss entry patch.
4841 const DexFile& dex_file = load->GetDexFile();
4842 const dex::StringIndex string_index = load->GetStringIndex();
4843 Register temp = XRegisterFrom(out_loc);
4844 vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
4845 codegen_->EmitAdrpPlaceholder(adrp_label, temp);
4846 // Add LDR with its PC-relative String .bss entry patch.
4847 vixl::aarch64::Label* ldr_label =
4848 codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
4849 // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
4850 codegen_->GenerateGcRootFieldLoad(load,
4851 out_loc,
4852 temp,
4853 /* offset placeholder */ 0u,
4854 ldr_label,
4855 kCompilerReadBarrierOption);
4856 SlowPathCodeARM64* slow_path =
4857 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
4858 codegen_->AddSlowPath(slow_path);
4859 __ Cbz(out.X(), slow_path->GetEntryLabel());
4860 __ Bind(slow_path->GetExitLabel());
4861 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4862 return;
4863 }
4864 case HLoadString::LoadKind::kJitBootImageAddress: {
4865 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
4866 DCHECK_NE(address, 0u);
4867 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
4868 return;
4869 }
4870 case HLoadString::LoadKind::kJitTableAddress: {
4871 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
4872 load->GetStringIndex(),
4873 load->GetString()));
4874 codegen_->GenerateGcRootFieldLoad(load,
4875 out_loc,
4876 out.X(),
4877 /* offset= */ 0,
4878 /* fixup_label= */ nullptr,
4879 kCompilerReadBarrierOption);
4880 return;
4881 }
4882 default:
4883 break;
4884 }
4885
4886 // TODO: Re-add the compiler code to do string dex cache lookup again.
4887 InvokeRuntimeCallingConvention calling_convention;
4888 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
4889 __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
4890 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
4891 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
4892 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4893 }
4894
VisitLongConstant(HLongConstant * constant)4895 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
4896 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4897 locations->SetOut(Location::ConstantLocation(constant));
4898 }
4899
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)4900 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
4901 // Will be generated at use site.
4902 }
4903
VisitMonitorOperation(HMonitorOperation * instruction)4904 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
4905 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4906 instruction, LocationSummary::kCallOnMainOnly);
4907 InvokeRuntimeCallingConvention calling_convention;
4908 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
4909 }
4910
VisitMonitorOperation(HMonitorOperation * instruction)4911 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
4912 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
4913 instruction,
4914 instruction->GetDexPc());
4915 if (instruction->IsEnter()) {
4916 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
4917 } else {
4918 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
4919 }
4920 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4921 }
4922
VisitMul(HMul * mul)4923 void LocationsBuilderARM64::VisitMul(HMul* mul) {
4924 LocationSummary* locations =
4925 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4926 switch (mul->GetResultType()) {
4927 case DataType::Type::kInt32:
4928 case DataType::Type::kInt64:
4929 locations->SetInAt(0, Location::RequiresRegister());
4930 locations->SetInAt(1, Location::RequiresRegister());
4931 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4932 break;
4933
4934 case DataType::Type::kFloat32:
4935 case DataType::Type::kFloat64:
4936 locations->SetInAt(0, Location::RequiresFpuRegister());
4937 locations->SetInAt(1, Location::RequiresFpuRegister());
4938 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4939 break;
4940
4941 default:
4942 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4943 }
4944 }
4945
VisitMul(HMul * mul)4946 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
4947 switch (mul->GetResultType()) {
4948 case DataType::Type::kInt32:
4949 case DataType::Type::kInt64:
4950 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
4951 break;
4952
4953 case DataType::Type::kFloat32:
4954 case DataType::Type::kFloat64:
4955 __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
4956 break;
4957
4958 default:
4959 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4960 }
4961 }
4962
VisitNeg(HNeg * neg)4963 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
4964 LocationSummary* locations =
4965 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
4966 switch (neg->GetResultType()) {
4967 case DataType::Type::kInt32:
4968 case DataType::Type::kInt64:
4969 locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
4970 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4971 break;
4972
4973 case DataType::Type::kFloat32:
4974 case DataType::Type::kFloat64:
4975 locations->SetInAt(0, Location::RequiresFpuRegister());
4976 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4977 break;
4978
4979 default:
4980 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
4981 }
4982 }
4983
VisitNeg(HNeg * neg)4984 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
4985 switch (neg->GetResultType()) {
4986 case DataType::Type::kInt32:
4987 case DataType::Type::kInt64:
4988 __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
4989 break;
4990
4991 case DataType::Type::kFloat32:
4992 case DataType::Type::kFloat64:
4993 __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
4994 break;
4995
4996 default:
4997 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
4998 }
4999 }
5000
VisitNewArray(HNewArray * instruction)5001 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5002 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5003 instruction, LocationSummary::kCallOnMainOnly);
5004 InvokeRuntimeCallingConvention calling_convention;
5005 locations->SetOut(LocationFrom(x0));
5006 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5007 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5008 }
5009
VisitNewArray(HNewArray * instruction)5010 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5011 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5012 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5013 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5014 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5015 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5016 }
5017
VisitNewInstance(HNewInstance * instruction)5018 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5019 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5020 instruction, LocationSummary::kCallOnMainOnly);
5021 InvokeRuntimeCallingConvention calling_convention;
5022 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5023 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
5024 }
5025
VisitNewInstance(HNewInstance * instruction)5026 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5027 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5028 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5029 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5030 }
5031
VisitNot(HNot * instruction)5032 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5033 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5034 locations->SetInAt(0, Location::RequiresRegister());
5035 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5036 }
5037
VisitNot(HNot * instruction)5038 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5039 switch (instruction->GetResultType()) {
5040 case DataType::Type::kInt32:
5041 case DataType::Type::kInt64:
5042 __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5043 break;
5044
5045 default:
5046 LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5047 }
5048 }
5049
VisitBooleanNot(HBooleanNot * instruction)5050 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5051 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5052 locations->SetInAt(0, Location::RequiresRegister());
5053 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5054 }
5055
VisitBooleanNot(HBooleanNot * instruction)5056 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5057 __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5058 }
5059
VisitNullCheck(HNullCheck * instruction)5060 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5061 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5062 locations->SetInAt(0, Location::RequiresRegister());
5063 }
5064
GenerateImplicitNullCheck(HNullCheck * instruction)5065 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5066 if (CanMoveNullCheckToUser(instruction)) {
5067 return;
5068 }
5069 {
5070 // Ensure that between load and RecordPcInfo there are no pools emitted.
5071 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5072 Location obj = instruction->GetLocations()->InAt(0);
5073 __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5074 RecordPcInfo(instruction, instruction->GetDexPc());
5075 }
5076 }
5077
GenerateExplicitNullCheck(HNullCheck * instruction)5078 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5079 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
5080 AddSlowPath(slow_path);
5081
5082 LocationSummary* locations = instruction->GetLocations();
5083 Location obj = locations->InAt(0);
5084
5085 __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5086 }
5087
VisitNullCheck(HNullCheck * instruction)5088 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5089 codegen_->GenerateNullCheck(instruction);
5090 }
5091
VisitOr(HOr * instruction)5092 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5093 HandleBinaryOp(instruction);
5094 }
5095
VisitOr(HOr * instruction)5096 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5097 HandleBinaryOp(instruction);
5098 }
5099
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5100 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5101 LOG(FATAL) << "Unreachable";
5102 }
5103
VisitParallelMove(HParallelMove * instruction)5104 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5105 if (instruction->GetNext()->IsSuspendCheck() &&
5106 instruction->GetBlock()->GetLoopInformation() != nullptr) {
5107 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5108 // The back edge will generate the suspend check.
5109 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5110 }
5111
5112 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5113 }
5114
VisitParameterValue(HParameterValue * instruction)5115 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5116 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5117 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5118 if (location.IsStackSlot()) {
5119 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5120 } else if (location.IsDoubleStackSlot()) {
5121 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5122 }
5123 locations->SetOut(location);
5124 }
5125
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5126 void InstructionCodeGeneratorARM64::VisitParameterValue(
5127 HParameterValue* instruction ATTRIBUTE_UNUSED) {
5128 // Nothing to do, the parameter is already at its location.
5129 }
5130
VisitCurrentMethod(HCurrentMethod * instruction)5131 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5132 LocationSummary* locations =
5133 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5134 locations->SetOut(LocationFrom(kArtMethodRegister));
5135 }
5136
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5137 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5138 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5139 // Nothing to do, the method is already at its location.
5140 }
5141
VisitPhi(HPhi * instruction)5142 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5143 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5144 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5145 locations->SetInAt(i, Location::Any());
5146 }
5147 locations->SetOut(Location::Any());
5148 }
5149
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5150 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5151 LOG(FATAL) << "Unreachable";
5152 }
5153
VisitRem(HRem * rem)5154 void LocationsBuilderARM64::VisitRem(HRem* rem) {
5155 DataType::Type type = rem->GetResultType();
5156 LocationSummary::CallKind call_kind =
5157 DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5158 : LocationSummary::kNoCall;
5159 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
5160
5161 switch (type) {
5162 case DataType::Type::kInt32:
5163 case DataType::Type::kInt64:
5164 locations->SetInAt(0, Location::RequiresRegister());
5165 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5166 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5167 break;
5168
5169 case DataType::Type::kFloat32:
5170 case DataType::Type::kFloat64: {
5171 InvokeRuntimeCallingConvention calling_convention;
5172 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
5173 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
5174 locations->SetOut(calling_convention.GetReturnLocation(type));
5175
5176 break;
5177 }
5178
5179 default:
5180 LOG(FATAL) << "Unexpected rem type " << type;
5181 }
5182 }
5183
GenerateIntRemForPower2Denom(HRem * instruction)5184 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) {
5185 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
5186 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
5187 DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
5188
5189 Register out = OutputRegister(instruction);
5190 Register dividend = InputRegisterAt(instruction, 0);
5191
5192 if (abs_imm == 2) {
5193 __ Cmp(dividend, 0);
5194 __ And(out, dividend, 1);
5195 __ Csneg(out, out, out, ge);
5196 } else {
5197 UseScratchRegisterScope temps(GetVIXLAssembler());
5198 Register temp = temps.AcquireSameSizeAs(out);
5199
5200 __ Negs(temp, dividend);
5201 __ And(out, dividend, abs_imm - 1);
5202 __ And(temp, temp, abs_imm - 1);
5203 __ Csneg(out, out, temp, mi);
5204 }
5205 }
5206
GenerateIntRemForConstDenom(HRem * instruction)5207 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) {
5208 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
5209
5210 if (imm == 0) {
5211 // Do not generate anything.
5212 // DivZeroCheck would prevent any code to be executed.
5213 return;
5214 }
5215
5216 if (IsPowerOfTwo(AbsOrMin(imm))) {
5217 // Cases imm == -1 or imm == 1 are handled in constant folding by
5218 // InstructionWithAbsorbingInputSimplifier.
5219 // If the cases have survided till code generation they are handled in
5220 // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0).
5221 // The correct code is generated for them, just more instructions.
5222 GenerateIntRemForPower2Denom(instruction);
5223 } else {
5224 DCHECK(imm < -2 || imm > 2) << imm;
5225 GenerateDivRemWithAnyConstant(instruction);
5226 }
5227 }
5228
GenerateIntRem(HRem * instruction)5229 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) {
5230 DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
5231 << instruction->GetResultType();
5232
5233 if (instruction->GetLocations()->InAt(1).IsConstant()) {
5234 GenerateIntRemForConstDenom(instruction);
5235 } else {
5236 Register out = OutputRegister(instruction);
5237 Register dividend = InputRegisterAt(instruction, 0);
5238 Register divisor = InputRegisterAt(instruction, 1);
5239 UseScratchRegisterScope temps(GetVIXLAssembler());
5240 Register temp = temps.AcquireSameSizeAs(out);
5241 __ Sdiv(temp, dividend, divisor);
5242 __ Msub(out, temp, divisor, dividend);
5243 }
5244 }
5245
VisitRem(HRem * rem)5246 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
5247 DataType::Type type = rem->GetResultType();
5248
5249 switch (type) {
5250 case DataType::Type::kInt32:
5251 case DataType::Type::kInt64: {
5252 GenerateIntRem(rem);
5253 break;
5254 }
5255
5256 case DataType::Type::kFloat32:
5257 case DataType::Type::kFloat64: {
5258 QuickEntrypointEnum entrypoint =
5259 (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
5260 codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
5261 if (type == DataType::Type::kFloat32) {
5262 CheckEntrypointTypes<kQuickFmodf, float, float, float>();
5263 } else {
5264 CheckEntrypointTypes<kQuickFmod, double, double, double>();
5265 }
5266 break;
5267 }
5268
5269 default:
5270 LOG(FATAL) << "Unexpected rem type " << type;
5271 UNREACHABLE();
5272 }
5273 }
5274
VisitMin(HMin * min)5275 void LocationsBuilderARM64::VisitMin(HMin* min) {
5276 HandleBinaryOp(min);
5277 }
5278
VisitMin(HMin * min)5279 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
5280 HandleBinaryOp(min);
5281 }
5282
VisitMax(HMax * max)5283 void LocationsBuilderARM64::VisitMax(HMax* max) {
5284 HandleBinaryOp(max);
5285 }
5286
VisitMax(HMax * max)5287 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
5288 HandleBinaryOp(max);
5289 }
5290
VisitAbs(HAbs * abs)5291 void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
5292 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5293 switch (abs->GetResultType()) {
5294 case DataType::Type::kInt32:
5295 case DataType::Type::kInt64:
5296 locations->SetInAt(0, Location::RequiresRegister());
5297 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5298 break;
5299 case DataType::Type::kFloat32:
5300 case DataType::Type::kFloat64:
5301 locations->SetInAt(0, Location::RequiresFpuRegister());
5302 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5303 break;
5304 default:
5305 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5306 }
5307 }
5308
VisitAbs(HAbs * abs)5309 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
5310 switch (abs->GetResultType()) {
5311 case DataType::Type::kInt32:
5312 case DataType::Type::kInt64: {
5313 Register in_reg = InputRegisterAt(abs, 0);
5314 Register out_reg = OutputRegister(abs);
5315 __ Cmp(in_reg, Operand(0));
5316 __ Cneg(out_reg, in_reg, lt);
5317 break;
5318 }
5319 case DataType::Type::kFloat32:
5320 case DataType::Type::kFloat64: {
5321 FPRegister in_reg = InputFPRegisterAt(abs, 0);
5322 FPRegister out_reg = OutputFPRegister(abs);
5323 __ Fabs(out_reg, in_reg);
5324 break;
5325 }
5326 default:
5327 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5328 }
5329 }
5330
VisitConstructorFence(HConstructorFence * constructor_fence)5331 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
5332 constructor_fence->SetLocations(nullptr);
5333 }
5334
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)5335 void InstructionCodeGeneratorARM64::VisitConstructorFence(
5336 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
5337 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
5338 }
5339
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5340 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5341 memory_barrier->SetLocations(nullptr);
5342 }
5343
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)5344 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5345 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
5346 }
5347
VisitReturn(HReturn * instruction)5348 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
5349 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5350 DataType::Type return_type = instruction->InputAt(0)->GetType();
5351 locations->SetInAt(0, ARM64ReturnLocation(return_type));
5352 }
5353
VisitReturn(HReturn * instruction ATTRIBUTE_UNUSED)5354 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) {
5355 codegen_->GenerateFrameExit();
5356 }
5357
VisitReturnVoid(HReturnVoid * instruction)5358 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
5359 instruction->SetLocations(nullptr);
5360 }
5361
VisitReturnVoid(HReturnVoid * instruction ATTRIBUTE_UNUSED)5362 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
5363 codegen_->GenerateFrameExit();
5364 }
5365
VisitRor(HRor * ror)5366 void LocationsBuilderARM64::VisitRor(HRor* ror) {
5367 HandleBinaryOp(ror);
5368 }
5369
VisitRor(HRor * ror)5370 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
5371 HandleBinaryOp(ror);
5372 }
5373
VisitShl(HShl * shl)5374 void LocationsBuilderARM64::VisitShl(HShl* shl) {
5375 HandleShift(shl);
5376 }
5377
VisitShl(HShl * shl)5378 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
5379 HandleShift(shl);
5380 }
5381
VisitShr(HShr * shr)5382 void LocationsBuilderARM64::VisitShr(HShr* shr) {
5383 HandleShift(shr);
5384 }
5385
VisitShr(HShr * shr)5386 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
5387 HandleShift(shr);
5388 }
5389
VisitSub(HSub * instruction)5390 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
5391 HandleBinaryOp(instruction);
5392 }
5393
VisitSub(HSub * instruction)5394 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
5395 HandleBinaryOp(instruction);
5396 }
5397
VisitStaticFieldGet(HStaticFieldGet * instruction)5398 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5399 HandleFieldGet(instruction, instruction->GetFieldInfo());
5400 }
5401
VisitStaticFieldGet(HStaticFieldGet * instruction)5402 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5403 HandleFieldGet(instruction, instruction->GetFieldInfo());
5404 }
5405
VisitStaticFieldSet(HStaticFieldSet * instruction)5406 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5407 HandleFieldSet(instruction);
5408 }
5409
VisitStaticFieldSet(HStaticFieldSet * instruction)5410 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5411 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5412 }
5413
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5414 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
5415 HUnresolvedInstanceFieldGet* instruction) {
5416 FieldAccessCallingConventionARM64 calling_convention;
5417 codegen_->CreateUnresolvedFieldLocationSummary(
5418 instruction, instruction->GetFieldType(), calling_convention);
5419 }
5420
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5421 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
5422 HUnresolvedInstanceFieldGet* instruction) {
5423 FieldAccessCallingConventionARM64 calling_convention;
5424 codegen_->GenerateUnresolvedFieldAccess(instruction,
5425 instruction->GetFieldType(),
5426 instruction->GetFieldIndex(),
5427 instruction->GetDexPc(),
5428 calling_convention);
5429 }
5430
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5431 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
5432 HUnresolvedInstanceFieldSet* instruction) {
5433 FieldAccessCallingConventionARM64 calling_convention;
5434 codegen_->CreateUnresolvedFieldLocationSummary(
5435 instruction, instruction->GetFieldType(), calling_convention);
5436 }
5437
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5438 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
5439 HUnresolvedInstanceFieldSet* instruction) {
5440 FieldAccessCallingConventionARM64 calling_convention;
5441 codegen_->GenerateUnresolvedFieldAccess(instruction,
5442 instruction->GetFieldType(),
5443 instruction->GetFieldIndex(),
5444 instruction->GetDexPc(),
5445 calling_convention);
5446 }
5447
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5448 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
5449 HUnresolvedStaticFieldGet* instruction) {
5450 FieldAccessCallingConventionARM64 calling_convention;
5451 codegen_->CreateUnresolvedFieldLocationSummary(
5452 instruction, instruction->GetFieldType(), calling_convention);
5453 }
5454
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5455 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
5456 HUnresolvedStaticFieldGet* instruction) {
5457 FieldAccessCallingConventionARM64 calling_convention;
5458 codegen_->GenerateUnresolvedFieldAccess(instruction,
5459 instruction->GetFieldType(),
5460 instruction->GetFieldIndex(),
5461 instruction->GetDexPc(),
5462 calling_convention);
5463 }
5464
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5465 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
5466 HUnresolvedStaticFieldSet* instruction) {
5467 FieldAccessCallingConventionARM64 calling_convention;
5468 codegen_->CreateUnresolvedFieldLocationSummary(
5469 instruction, instruction->GetFieldType(), calling_convention);
5470 }
5471
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5472 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
5473 HUnresolvedStaticFieldSet* instruction) {
5474 FieldAccessCallingConventionARM64 calling_convention;
5475 codegen_->GenerateUnresolvedFieldAccess(instruction,
5476 instruction->GetFieldType(),
5477 instruction->GetFieldIndex(),
5478 instruction->GetDexPc(),
5479 calling_convention);
5480 }
5481
VisitSuspendCheck(HSuspendCheck * instruction)5482 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5483 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5484 instruction, LocationSummary::kCallOnSlowPath);
5485 // In suspend check slow path, usually there are no caller-save registers at all.
5486 // If SIMD instructions are present, however, we force spilling all live SIMD
5487 // registers in full width (since the runtime only saves/restores lower part).
5488 locations->SetCustomSlowPathCallerSaves(
5489 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5490 }
5491
VisitSuspendCheck(HSuspendCheck * instruction)5492 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5493 HBasicBlock* block = instruction->GetBlock();
5494 if (block->GetLoopInformation() != nullptr) {
5495 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5496 // The back edge will generate the suspend check.
5497 return;
5498 }
5499 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5500 // The goto will generate the suspend check.
5501 return;
5502 }
5503 GenerateSuspendCheck(instruction, nullptr);
5504 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5505 }
5506
VisitThrow(HThrow * instruction)5507 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
5508 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5509 instruction, LocationSummary::kCallOnMainOnly);
5510 InvokeRuntimeCallingConvention calling_convention;
5511 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5512 }
5513
VisitThrow(HThrow * instruction)5514 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
5515 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5516 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5517 }
5518
VisitTypeConversion(HTypeConversion * conversion)5519 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
5520 LocationSummary* locations =
5521 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
5522 DataType::Type input_type = conversion->GetInputType();
5523 DataType::Type result_type = conversion->GetResultType();
5524 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
5525 << input_type << " -> " << result_type;
5526 if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
5527 (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
5528 LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
5529 }
5530
5531 if (DataType::IsFloatingPointType(input_type)) {
5532 locations->SetInAt(0, Location::RequiresFpuRegister());
5533 } else {
5534 locations->SetInAt(0, Location::RequiresRegister());
5535 }
5536
5537 if (DataType::IsFloatingPointType(result_type)) {
5538 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5539 } else {
5540 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5541 }
5542 }
5543
VisitTypeConversion(HTypeConversion * conversion)5544 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
5545 DataType::Type result_type = conversion->GetResultType();
5546 DataType::Type input_type = conversion->GetInputType();
5547
5548 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
5549 << input_type << " -> " << result_type;
5550
5551 if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
5552 int result_size = DataType::Size(result_type);
5553 int input_size = DataType::Size(input_type);
5554 int min_size = std::min(result_size, input_size);
5555 Register output = OutputRegister(conversion);
5556 Register source = InputRegisterAt(conversion, 0);
5557 if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
5558 // 'int' values are used directly as W registers, discarding the top
5559 // bits, so we don't need to sign-extend and can just perform a move.
5560 // We do not pass the `kDiscardForSameWReg` argument to force clearing the
5561 // top 32 bits of the target register. We theoretically could leave those
5562 // bits unchanged, but we would have to make sure that no code uses a
5563 // 32bit input value as a 64bit value assuming that the top 32 bits are
5564 // zero.
5565 __ Mov(output.W(), source.W());
5566 } else if (DataType::IsUnsignedType(result_type) ||
5567 (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
5568 __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
5569 } else {
5570 __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
5571 }
5572 } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
5573 __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
5574 } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
5575 CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
5576 __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
5577 } else if (DataType::IsFloatingPointType(result_type) &&
5578 DataType::IsFloatingPointType(input_type)) {
5579 __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
5580 } else {
5581 LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
5582 << " to " << result_type;
5583 }
5584 }
5585
VisitUShr(HUShr * ushr)5586 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
5587 HandleShift(ushr);
5588 }
5589
VisitUShr(HUShr * ushr)5590 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
5591 HandleShift(ushr);
5592 }
5593
VisitXor(HXor * instruction)5594 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
5595 HandleBinaryOp(instruction);
5596 }
5597
VisitXor(HXor * instruction)5598 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
5599 HandleBinaryOp(instruction);
5600 }
5601
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5602 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5603 // Nothing to do, this should be removed during prepare for register allocator.
5604 LOG(FATAL) << "Unreachable";
5605 }
5606
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)5607 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5608 // Nothing to do, this should be removed during prepare for register allocator.
5609 LOG(FATAL) << "Unreachable";
5610 }
5611
5612 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)5613 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5614 LocationSummary* locations =
5615 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
5616 locations->SetInAt(0, Location::RequiresRegister());
5617 }
5618
VisitPackedSwitch(HPackedSwitch * switch_instr)5619 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5620 int32_t lower_bound = switch_instr->GetStartValue();
5621 uint32_t num_entries = switch_instr->GetNumEntries();
5622 Register value_reg = InputRegisterAt(switch_instr, 0);
5623 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
5624
5625 // Roughly set 16 as max average assemblies generated per HIR in a graph.
5626 static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
5627 // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
5628 // make sure we don't emit it if the target may run out of range.
5629 // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
5630 // ranges and emit the tables only as required.
5631 static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
5632
5633 if (num_entries <= kPackedSwitchCompareJumpThreshold ||
5634 // Current instruction id is an upper bound of the number of HIRs in the graph.
5635 GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
5636 // Create a series of compare/jumps.
5637 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5638 Register temp = temps.AcquireW();
5639 __ Subs(temp, value_reg, Operand(lower_bound));
5640
5641 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
5642 // Jump to successors[0] if value == lower_bound.
5643 __ B(eq, codegen_->GetLabelOf(successors[0]));
5644 int32_t last_index = 0;
5645 for (; num_entries - last_index > 2; last_index += 2) {
5646 __ Subs(temp, temp, Operand(2));
5647 // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
5648 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
5649 // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
5650 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
5651 }
5652 if (num_entries - last_index == 2) {
5653 // The last missing case_value.
5654 __ Cmp(temp, Operand(1));
5655 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
5656 }
5657
5658 // And the default for any other value.
5659 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
5660 __ B(codegen_->GetLabelOf(default_block));
5661 }
5662 } else {
5663 JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
5664
5665 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5666
5667 // Below instructions should use at most one blocked register. Since there are two blocked
5668 // registers, we are free to block one.
5669 Register temp_w = temps.AcquireW();
5670 Register index;
5671 // Remove the bias.
5672 if (lower_bound != 0) {
5673 index = temp_w;
5674 __ Sub(index, value_reg, Operand(lower_bound));
5675 } else {
5676 index = value_reg;
5677 }
5678
5679 // Jump to default block if index is out of the range.
5680 __ Cmp(index, Operand(num_entries));
5681 __ B(hs, codegen_->GetLabelOf(default_block));
5682
5683 // In current VIXL implementation, it won't require any blocked registers to encode the
5684 // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
5685 // register pressure.
5686 Register table_base = temps.AcquireX();
5687 // Load jump offset from the table.
5688 __ Adr(table_base, jump_table->GetTableStartLabel());
5689 Register jump_offset = temp_w;
5690 __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
5691
5692 // Jump to target block by branching to table_base(pc related) + offset.
5693 Register target_address = table_base;
5694 __ Add(target_address, table_base, Operand(jump_offset, SXTW));
5695 __ Br(target_address);
5696 }
5697 }
5698
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5699 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
5700 HInstruction* instruction,
5701 Location out,
5702 uint32_t offset,
5703 Location maybe_temp,
5704 ReadBarrierOption read_barrier_option) {
5705 DataType::Type type = DataType::Type::kReference;
5706 Register out_reg = RegisterFrom(out, type);
5707 if (read_barrier_option == kWithReadBarrier) {
5708 CHECK(kEmitCompilerReadBarrier);
5709 if (kUseBakerReadBarrier) {
5710 // Load with fast path based Baker's read barrier.
5711 // /* HeapReference<Object> */ out = *(out + offset)
5712 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5713 out,
5714 out_reg,
5715 offset,
5716 maybe_temp,
5717 /* needs_null_check= */ false,
5718 /* use_load_acquire= */ false);
5719 } else {
5720 // Load with slow path based read barrier.
5721 // Save the value of `out` into `maybe_temp` before overwriting it
5722 // in the following move operation, as we will need it for the
5723 // read barrier below.
5724 Register temp_reg = RegisterFrom(maybe_temp, type);
5725 __ Mov(temp_reg, out_reg);
5726 // /* HeapReference<Object> */ out = *(out + offset)
5727 __ Ldr(out_reg, HeapOperand(out_reg, offset));
5728 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
5729 }
5730 } else {
5731 // Plain load with no read barrier.
5732 // /* HeapReference<Object> */ out = *(out + offset)
5733 __ Ldr(out_reg, HeapOperand(out_reg, offset));
5734 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5735 }
5736 }
5737
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)5738 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
5739 HInstruction* instruction,
5740 Location out,
5741 Location obj,
5742 uint32_t offset,
5743 Location maybe_temp,
5744 ReadBarrierOption read_barrier_option) {
5745 DataType::Type type = DataType::Type::kReference;
5746 Register out_reg = RegisterFrom(out, type);
5747 Register obj_reg = RegisterFrom(obj, type);
5748 if (read_barrier_option == kWithReadBarrier) {
5749 CHECK(kEmitCompilerReadBarrier);
5750 if (kUseBakerReadBarrier) {
5751 // Load with fast path based Baker's read barrier.
5752 // /* HeapReference<Object> */ out = *(obj + offset)
5753 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5754 out,
5755 obj_reg,
5756 offset,
5757 maybe_temp,
5758 /* needs_null_check= */ false,
5759 /* use_load_acquire= */ false);
5760 } else {
5761 // Load with slow path based read barrier.
5762 // /* HeapReference<Object> */ out = *(obj + offset)
5763 __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5764 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
5765 }
5766 } else {
5767 // Plain load with no read barrier.
5768 // /* HeapReference<Object> */ out = *(obj + offset)
5769 __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5770 GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5771 }
5772 }
5773
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,Register obj,uint32_t offset,vixl::aarch64::Label * fixup_label,ReadBarrierOption read_barrier_option)5774 void CodeGeneratorARM64::GenerateGcRootFieldLoad(
5775 HInstruction* instruction,
5776 Location root,
5777 Register obj,
5778 uint32_t offset,
5779 vixl::aarch64::Label* fixup_label,
5780 ReadBarrierOption read_barrier_option) {
5781 DCHECK(fixup_label == nullptr || offset == 0u);
5782 Register root_reg = RegisterFrom(root, DataType::Type::kReference);
5783 if (read_barrier_option == kWithReadBarrier) {
5784 DCHECK(kEmitCompilerReadBarrier);
5785 if (kUseBakerReadBarrier) {
5786 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
5787 // Baker's read barrier are used.
5788
5789 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
5790 // the Marking Register) to decide whether we need to enter
5791 // the slow path to mark the GC root.
5792 //
5793 // We use shared thunks for the slow path; shared within the method
5794 // for JIT, across methods for AOT. That thunk checks the reference
5795 // and jumps to the entrypoint if needed.
5796 //
5797 // lr = &return_address;
5798 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
5799 // if (mr) { // Thread::Current()->GetIsGcMarking()
5800 // goto gc_root_thunk<root_reg>(lr)
5801 // }
5802 // return_address:
5803
5804 UseScratchRegisterScope temps(GetVIXLAssembler());
5805 DCHECK(temps.IsAvailable(ip0));
5806 DCHECK(temps.IsAvailable(ip1));
5807 temps.Exclude(ip0, ip1);
5808 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
5809
5810 ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
5811 vixl::aarch64::Label return_address;
5812 __ adr(lr, &return_address);
5813 if (fixup_label != nullptr) {
5814 __ bind(fixup_label);
5815 }
5816 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
5817 "GC root LDR must be 2 instructions (8B) before the return address label.");
5818 __ ldr(root_reg, MemOperand(obj.X(), offset));
5819 EmitBakerReadBarrierCbnz(custom_data);
5820 __ bind(&return_address);
5821 } else {
5822 // GC root loaded through a slow path for read barriers other
5823 // than Baker's.
5824 // /* GcRoot<mirror::Object>* */ root = obj + offset
5825 if (fixup_label == nullptr) {
5826 __ Add(root_reg.X(), obj.X(), offset);
5827 } else {
5828 EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
5829 }
5830 // /* mirror::Object* */ root = root->Read()
5831 GenerateReadBarrierForRootSlow(instruction, root, root);
5832 }
5833 } else {
5834 // Plain GC root load with no read barrier.
5835 // /* GcRoot<mirror::Object> */ root = *(obj + offset)
5836 if (fixup_label == nullptr) {
5837 __ Ldr(root_reg, MemOperand(obj, offset));
5838 } else {
5839 EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
5840 }
5841 // Note that GC roots are not affected by heap poisoning, thus we
5842 // do not have to unpoison `root_reg` here.
5843 }
5844 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5845 }
5846
GenerateUnsafeCasOldValueMovWithBakerReadBarrier(vixl::aarch64::Register marked,vixl::aarch64::Register old_value)5847 void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier(
5848 vixl::aarch64::Register marked,
5849 vixl::aarch64::Register old_value) {
5850 DCHECK(kEmitCompilerReadBarrier);
5851 DCHECK(kUseBakerReadBarrier);
5852
5853 // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
5854 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode());
5855
5856 ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
5857 vixl::aarch64::Label return_address;
5858 __ adr(lr, &return_address);
5859 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
5860 "GC root LDR must be 2 instructions (8B) before the return address label.");
5861 __ mov(marked, old_value);
5862 EmitBakerReadBarrierCbnz(custom_data);
5863 __ bind(&return_address);
5864 }
5865
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,vixl::aarch64::Register obj,const vixl::aarch64::MemOperand & src,bool needs_null_check,bool use_load_acquire)5866 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
5867 Location ref,
5868 vixl::aarch64::Register obj,
5869 const vixl::aarch64::MemOperand& src,
5870 bool needs_null_check,
5871 bool use_load_acquire) {
5872 DCHECK(kEmitCompilerReadBarrier);
5873 DCHECK(kUseBakerReadBarrier);
5874
5875 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
5876 // Marking Register) to decide whether we need to enter the slow
5877 // path to mark the reference. Then, in the slow path, check the
5878 // gray bit in the lock word of the reference's holder (`obj`) to
5879 // decide whether to mark `ref` or not.
5880 //
5881 // We use shared thunks for the slow path; shared within the method
5882 // for JIT, across methods for AOT. That thunk checks the holder
5883 // and jumps to the entrypoint if needed. If the holder is not gray,
5884 // it creates a fake dependency and returns to the LDR instruction.
5885 //
5886 // lr = &gray_return_address;
5887 // if (mr) { // Thread::Current()->GetIsGcMarking()
5888 // goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr)
5889 // }
5890 // not_gray_return_address:
5891 // // Original reference load. If the offset is too large to fit
5892 // // into LDR, we use an adjusted base register here.
5893 // HeapReference<mirror::Object> reference = *(obj+offset);
5894 // gray_return_address:
5895
5896 DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
5897 DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
5898
5899 UseScratchRegisterScope temps(GetVIXLAssembler());
5900 DCHECK(temps.IsAvailable(ip0));
5901 DCHECK(temps.IsAvailable(ip1));
5902 temps.Exclude(ip0, ip1);
5903 uint32_t custom_data = use_load_acquire
5904 ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
5905 : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
5906
5907 {
5908 ExactAssemblyScope guard(GetVIXLAssembler(),
5909 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
5910 vixl::aarch64::Label return_address;
5911 __ adr(lr, &return_address);
5912 EmitBakerReadBarrierCbnz(custom_data);
5913 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
5914 "Field LDR must be 1 instruction (4B) before the return address label; "
5915 " 2 instructions (8B) for heap poisoning.");
5916 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
5917 if (use_load_acquire) {
5918 DCHECK_EQ(src.GetOffset(), 0);
5919 __ ldar(ref_reg, src);
5920 } else {
5921 __ ldr(ref_reg, src);
5922 }
5923 if (needs_null_check) {
5924 MaybeRecordImplicitNullCheck(instruction);
5925 }
5926 // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
5927 // macro instructions disallowed in ExactAssemblyScope.
5928 if (kPoisonHeapReferences) {
5929 __ neg(ref_reg, Operand(ref_reg));
5930 }
5931 __ bind(&return_address);
5932 }
5933 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
5934 }
5935
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,Location maybe_temp,bool needs_null_check,bool use_load_acquire)5936 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
5937 Location ref,
5938 Register obj,
5939 uint32_t offset,
5940 Location maybe_temp,
5941 bool needs_null_check,
5942 bool use_load_acquire) {
5943 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
5944 Register base = obj;
5945 if (use_load_acquire) {
5946 DCHECK(maybe_temp.IsRegister());
5947 base = WRegisterFrom(maybe_temp);
5948 __ Add(base, obj, offset);
5949 offset = 0u;
5950 } else if (offset >= kReferenceLoadMinFarOffset) {
5951 DCHECK(maybe_temp.IsRegister());
5952 base = WRegisterFrom(maybe_temp);
5953 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
5954 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
5955 offset &= (kReferenceLoadMinFarOffset - 1u);
5956 }
5957 MemOperand src(base.X(), offset);
5958 GenerateFieldLoadWithBakerReadBarrier(
5959 instruction, ref, obj, src, needs_null_check, use_load_acquire);
5960 }
5961
GenerateArrayLoadWithBakerReadBarrier(HArrayGet * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)5962 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
5963 Location ref,
5964 Register obj,
5965 uint32_t data_offset,
5966 Location index,
5967 bool needs_null_check) {
5968 DCHECK(kEmitCompilerReadBarrier);
5969 DCHECK(kUseBakerReadBarrier);
5970
5971 static_assert(
5972 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5973 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5974 size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
5975
5976 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
5977 // Marking Register) to decide whether we need to enter the slow
5978 // path to mark the reference. Then, in the slow path, check the
5979 // gray bit in the lock word of the reference's holder (`obj`) to
5980 // decide whether to mark `ref` or not.
5981 //
5982 // We use shared thunks for the slow path; shared within the method
5983 // for JIT, across methods for AOT. That thunk checks the holder
5984 // and jumps to the entrypoint if needed. If the holder is not gray,
5985 // it creates a fake dependency and returns to the LDR instruction.
5986 //
5987 // lr = &gray_return_address;
5988 // if (mr) { // Thread::Current()->GetIsGcMarking()
5989 // goto array_thunk<base_reg>(lr)
5990 // }
5991 // not_gray_return_address:
5992 // // Original reference load. If the offset is too large to fit
5993 // // into LDR, we use an adjusted base register here.
5994 // HeapReference<mirror::Object> reference = data[index];
5995 // gray_return_address:
5996
5997 DCHECK(index.IsValid());
5998 Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
5999 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6000
6001 UseScratchRegisterScope temps(GetVIXLAssembler());
6002 DCHECK(temps.IsAvailable(ip0));
6003 DCHECK(temps.IsAvailable(ip1));
6004 temps.Exclude(ip0, ip1);
6005
6006 Register temp;
6007 if (instruction->GetArray()->IsIntermediateAddress()) {
6008 // We do not need to compute the intermediate address from the array: the
6009 // input instruction has done it already. See the comment in
6010 // `TryExtractArrayAccessAddress()`.
6011 if (kIsDebugBuild) {
6012 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
6013 DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
6014 }
6015 temp = obj;
6016 } else {
6017 temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
6018 __ Add(temp.X(), obj.X(), Operand(data_offset));
6019 }
6020
6021 uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
6022
6023 {
6024 ExactAssemblyScope guard(GetVIXLAssembler(),
6025 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6026 vixl::aarch64::Label return_address;
6027 __ adr(lr, &return_address);
6028 EmitBakerReadBarrierCbnz(custom_data);
6029 static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6030 "Array LDR must be 1 instruction (4B) before the return address label; "
6031 " 2 instructions (8B) for heap poisoning.");
6032 __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
6033 DCHECK(!needs_null_check); // The thunk cannot handle the null check.
6034 // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6035 // macro instructions disallowed in ExactAssemblyScope.
6036 if (kPoisonHeapReferences) {
6037 __ neg(ref_reg, Operand(ref_reg));
6038 }
6039 __ bind(&return_address);
6040 }
6041 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6042 }
6043
MaybeGenerateMarkingRegisterCheck(int code,Location temp_loc)6044 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
6045 // The following condition is a compile-time one, so it does not have a run-time cost.
6046 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
6047 // The following condition is a run-time one; it is executed after the
6048 // previous compile-time test, to avoid penalizing non-debug builds.
6049 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
6050 UseScratchRegisterScope temps(GetVIXLAssembler());
6051 Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
6052 GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
6053 }
6054 }
6055 }
6056
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6057 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6058 Location out,
6059 Location ref,
6060 Location obj,
6061 uint32_t offset,
6062 Location index) {
6063 DCHECK(kEmitCompilerReadBarrier);
6064
6065 // Insert a slow path based read barrier *after* the reference load.
6066 //
6067 // If heap poisoning is enabled, the unpoisoning of the loaded
6068 // reference will be carried out by the runtime within the slow
6069 // path.
6070 //
6071 // Note that `ref` currently does not get unpoisoned (when heap
6072 // poisoning is enabled), which is alright as the `ref` argument is
6073 // not used by the artReadBarrierSlow entry point.
6074 //
6075 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6076 SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
6077 ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6078 AddSlowPath(slow_path);
6079
6080 __ B(slow_path->GetEntryLabel());
6081 __ Bind(slow_path->GetExitLabel());
6082 }
6083
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6084 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6085 Location out,
6086 Location ref,
6087 Location obj,
6088 uint32_t offset,
6089 Location index) {
6090 if (kEmitCompilerReadBarrier) {
6091 // Baker's read barriers shall be handled by the fast path
6092 // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6093 DCHECK(!kUseBakerReadBarrier);
6094 // If heap poisoning is enabled, unpoisoning will be taken care of
6095 // by the runtime within the slow path.
6096 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6097 } else if (kPoisonHeapReferences) {
6098 GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6099 }
6100 }
6101
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6102 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6103 Location out,
6104 Location root) {
6105 DCHECK(kEmitCompilerReadBarrier);
6106
6107 // Insert a slow path based read barrier *after* the GC root load.
6108 //
6109 // Note that GC roots are not affected by heap poisoning, so we do
6110 // not need to do anything special for this here.
6111 SlowPathCodeARM64* slow_path =
6112 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6113 AddSlowPath(slow_path);
6114
6115 __ B(slow_path->GetEntryLabel());
6116 __ Bind(slow_path->GetExitLabel());
6117 }
6118
VisitClassTableGet(HClassTableGet * instruction)6119 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6120 LocationSummary* locations =
6121 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6122 locations->SetInAt(0, Location::RequiresRegister());
6123 locations->SetOut(Location::RequiresRegister());
6124 }
6125
VisitClassTableGet(HClassTableGet * instruction)6126 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
6127 LocationSummary* locations = instruction->GetLocations();
6128 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
6129 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
6130 instruction->GetIndex(), kArm64PointerSize).SizeValue();
6131 __ Ldr(XRegisterFrom(locations->Out()),
6132 MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
6133 } else {
6134 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
6135 instruction->GetIndex(), kArm64PointerSize));
6136 __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
6137 mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
6138 __ Ldr(XRegisterFrom(locations->Out()),
6139 MemOperand(XRegisterFrom(locations->Out()), method_offset));
6140 }
6141 }
6142
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,vixl::aarch64::Literal<uint32_t> * literal,uint64_t index_in_table)6143 static void PatchJitRootUse(uint8_t* code,
6144 const uint8_t* roots_data,
6145 vixl::aarch64::Literal<uint32_t>* literal,
6146 uint64_t index_in_table) {
6147 uint32_t literal_offset = literal->GetOffset();
6148 uintptr_t address =
6149 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
6150 uint8_t* data = code + literal_offset;
6151 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
6152 }
6153
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)6154 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
6155 for (const auto& entry : jit_string_patches_) {
6156 const StringReference& string_reference = entry.first;
6157 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6158 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
6159 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6160 }
6161 for (const auto& entry : jit_class_patches_) {
6162 const TypeReference& type_reference = entry.first;
6163 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6164 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
6165 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6166 }
6167 }
6168
6169 #undef __
6170 #undef QUICK_ENTRY_POINT
6171
6172 #define __ assembler.GetVIXLAssembler()->
6173
EmitGrayCheckAndFastPath(arm64::Arm64Assembler & assembler,vixl::aarch64::Register base_reg,vixl::aarch64::MemOperand & lock_word,vixl::aarch64::Label * slow_path,vixl::aarch64::Label * throw_npe=nullptr)6174 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
6175 vixl::aarch64::Register base_reg,
6176 vixl::aarch64::MemOperand& lock_word,
6177 vixl::aarch64::Label* slow_path,
6178 vixl::aarch64::Label* throw_npe = nullptr) {
6179 // Load the lock word containing the rb_state.
6180 __ Ldr(ip0.W(), lock_word);
6181 // Given the numeric representation, it's enough to check the low bit of the rb_state.
6182 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
6183 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
6184 __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
6185 static_assert(
6186 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
6187 "Field and array LDR offsets must be the same to reuse the same code.");
6188 // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
6189 if (throw_npe != nullptr) {
6190 __ Bind(throw_npe);
6191 }
6192 // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
6193 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6194 "Field LDR must be 1 instruction (4B) before the return address label; "
6195 " 2 instructions (8B) for heap poisoning.");
6196 __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
6197 // Introduce a dependency on the lock_word including rb_state,
6198 // to prevent load-load reordering, and without using
6199 // a memory barrier (which would be more expensive).
6200 __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
6201 __ Br(lr); // And return back to the function.
6202 // Note: The fake dependency is unnecessary for the slow path.
6203 }
6204
6205 // Load the read barrier introspection entrypoint in register `entrypoint`.
LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler & assembler,vixl::aarch64::Register entrypoint)6206 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
6207 vixl::aarch64::Register entrypoint) {
6208 // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
6209 DCHECK_EQ(ip0.GetCode(), 16u);
6210 const int32_t entry_point_offset =
6211 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
6212 __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
6213 }
6214
CompileBakerReadBarrierThunk(Arm64Assembler & assembler,uint32_t encoded_data,std::string * debug_name)6215 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
6216 uint32_t encoded_data,
6217 /*out*/ std::string* debug_name) {
6218 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
6219 switch (kind) {
6220 case BakerReadBarrierKind::kField:
6221 case BakerReadBarrierKind::kAcquire: {
6222 auto base_reg =
6223 Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
6224 CheckValidReg(base_reg.GetCode());
6225 auto holder_reg =
6226 Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
6227 CheckValidReg(holder_reg.GetCode());
6228 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6229 temps.Exclude(ip0, ip1);
6230 // If base_reg differs from holder_reg, the offset was too large and we must have emitted
6231 // an explicit null check before the load. Otherwise, for implicit null checks, we need to
6232 // null-check the holder as we do not necessarily do that check before going to the thunk.
6233 vixl::aarch64::Label throw_npe_label;
6234 vixl::aarch64::Label* throw_npe = nullptr;
6235 if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
6236 throw_npe = &throw_npe_label;
6237 __ Cbz(holder_reg.W(), throw_npe);
6238 }
6239 // Check if the holder is gray and, if not, add fake dependency to the base register
6240 // and return to the LDR instruction to load the reference. Otherwise, use introspection
6241 // to load the reference and call the entrypoint that performs further checks on the
6242 // reference and marks it if needed.
6243 vixl::aarch64::Label slow_path;
6244 MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
6245 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
6246 __ Bind(&slow_path);
6247 if (kind == BakerReadBarrierKind::kField) {
6248 MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
6249 __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset.
6250 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6251 __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset.
6252 __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference.
6253 } else {
6254 DCHECK(kind == BakerReadBarrierKind::kAcquire);
6255 DCHECK(!base_reg.Is(holder_reg));
6256 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6257 __ Ldar(ip0.W(), MemOperand(base_reg));
6258 }
6259 // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
6260 __ Br(ip1); // Jump to the entrypoint.
6261 break;
6262 }
6263 case BakerReadBarrierKind::kArray: {
6264 auto base_reg =
6265 Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
6266 CheckValidReg(base_reg.GetCode());
6267 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6268 BakerReadBarrierSecondRegField::Decode(encoded_data));
6269 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6270 temps.Exclude(ip0, ip1);
6271 vixl::aarch64::Label slow_path;
6272 int32_t data_offset =
6273 mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
6274 MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
6275 DCHECK_LT(lock_word.GetOffset(), 0);
6276 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
6277 __ Bind(&slow_path);
6278 MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
6279 __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset.
6280 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6281 __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set).
6282 __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create
6283 // a switch case target based on the index register.
6284 __ Mov(ip0, base_reg); // Move the base register to ip0.
6285 __ Br(ip1); // Jump to the entrypoint's array switch case.
6286 break;
6287 }
6288 case BakerReadBarrierKind::kGcRoot: {
6289 // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
6290 // and it does not have a forwarding address), call the correct introspection entrypoint;
6291 // otherwise return the reference (or the extracted forwarding address).
6292 // There is no gray bit check for GC roots.
6293 auto root_reg =
6294 Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
6295 CheckValidReg(root_reg.GetCode());
6296 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6297 BakerReadBarrierSecondRegField::Decode(encoded_data));
6298 UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6299 temps.Exclude(ip0, ip1);
6300 vixl::aarch64::Label return_label, not_marked, forwarding_address;
6301 __ Cbz(root_reg, &return_label);
6302 MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
6303 __ Ldr(ip0.W(), lock_word);
6304 __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked);
6305 __ Bind(&return_label);
6306 __ Br(lr);
6307 __ Bind(¬_marked);
6308 __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
6309 __ B(&forwarding_address, mi);
6310 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6311 // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
6312 // art_quick_read_barrier_mark_introspection_gc_roots.
6313 __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
6314 __ Mov(ip0.W(), root_reg);
6315 __ Br(ip1);
6316 __ Bind(&forwarding_address);
6317 __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
6318 __ Br(lr);
6319 break;
6320 }
6321 default:
6322 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
6323 UNREACHABLE();
6324 }
6325
6326 // For JIT, the slow path is considered part of the compiled method,
6327 // so JIT should pass null as `debug_name`. Tests may not have a runtime.
6328 DCHECK(Runtime::Current() == nullptr ||
6329 !Runtime::Current()->UseJitCompilation() ||
6330 debug_name == nullptr);
6331 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
6332 std::ostringstream oss;
6333 oss << "BakerReadBarrierThunk";
6334 switch (kind) {
6335 case BakerReadBarrierKind::kField:
6336 oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
6337 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
6338 break;
6339 case BakerReadBarrierKind::kAcquire:
6340 oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
6341 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
6342 break;
6343 case BakerReadBarrierKind::kArray:
6344 oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
6345 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6346 BakerReadBarrierSecondRegField::Decode(encoded_data));
6347 break;
6348 case BakerReadBarrierKind::kGcRoot:
6349 oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
6350 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6351 BakerReadBarrierSecondRegField::Decode(encoded_data));
6352 break;
6353 }
6354 *debug_name = oss.str();
6355 }
6356 }
6357
6358 #undef __
6359
6360 } // namespace arm64
6361 } // namespace art
6362