1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
19
20 #include "base/bit_field.h"
21 #include "code_generator.h"
22 #include "common_arm64.h"
23 #include "dex/dex_file_types.h"
24 #include "dex/string_reference.h"
25 #include "dex/type_reference.h"
26 #include "driver/compiler_options.h"
27 #include "nodes.h"
28 #include "parallel_move_resolver.h"
29 #include "utils/arm64/assembler_arm64.h"
30
31 // TODO(VIXL): Make VIXL compile with -Wshadow.
32 #pragma GCC diagnostic push
33 #pragma GCC diagnostic ignored "-Wshadow"
34 #include "aarch64/disasm-aarch64.h"
35 #include "aarch64/macro-assembler-aarch64.h"
36 #pragma GCC diagnostic pop
37
38 namespace art {
39
40 namespace linker {
41 class Arm64RelativePatcherTest;
42 } // namespace linker
43
44 namespace arm64 {
45
46 class CodeGeneratorARM64;
47
48 // Use a local definition to prevent copying mistakes.
49 static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
50
51 // These constants are used as an approximate margin when emission of veneer and literal pools
52 // must be blocked.
53 static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize;
54 static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes;
55
56 static const vixl::aarch64::Register kParameterCoreRegisters[] = {
57 vixl::aarch64::x1,
58 vixl::aarch64::x2,
59 vixl::aarch64::x3,
60 vixl::aarch64::x4,
61 vixl::aarch64::x5,
62 vixl::aarch64::x6,
63 vixl::aarch64::x7
64 };
65 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
66 static const vixl::aarch64::FPRegister kParameterFPRegisters[] = {
67 vixl::aarch64::d0,
68 vixl::aarch64::d1,
69 vixl::aarch64::d2,
70 vixl::aarch64::d3,
71 vixl::aarch64::d4,
72 vixl::aarch64::d5,
73 vixl::aarch64::d6,
74 vixl::aarch64::d7
75 };
76 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
77
78 // Thread Register.
79 const vixl::aarch64::Register tr = vixl::aarch64::x19;
80 // Marking Register.
81 const vixl::aarch64::Register mr = vixl::aarch64::x20;
82 // Method register on invoke.
83 static const vixl::aarch64::Register kArtMethodRegister = vixl::aarch64::x0;
84 const vixl::aarch64::CPURegList vixl_reserved_core_registers(vixl::aarch64::ip0,
85 vixl::aarch64::ip1);
86 const vixl::aarch64::CPURegList vixl_reserved_fp_registers(vixl::aarch64::d31);
87
88 const vixl::aarch64::CPURegList runtime_reserved_core_registers =
89 vixl::aarch64::CPURegList(
90 tr,
91 // Reserve X20 as Marking Register when emitting Baker read barriers.
92 ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg),
93 vixl::aarch64::lr);
94
95 // Some instructions have special requirements for a temporary, for example
96 // LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
97 // temp that's not an R0 (to avoid an extra move) and Baker read barrier field
98 // loads with large offsets need a fixed register to limit the number of link-time
99 // thunks we generate. For these and similar cases, we want to reserve a specific
100 // register that's neither callee-save nor an argument register. We choose x15.
FixedTempLocation()101 inline Location FixedTempLocation() {
102 return Location::RegisterLocation(vixl::aarch64::x15.GetCode());
103 }
104
105 // Callee-save registers AAPCS64, without x19 (Thread Register) (nor
106 // x20 (Marking Register) when emitting Baker read barriers).
107 const vixl::aarch64::CPURegList callee_saved_core_registers(
108 vixl::aarch64::CPURegister::kRegister,
109 vixl::aarch64::kXRegSize,
110 ((kEmitCompilerReadBarrier && kUseBakerReadBarrier)
111 ? vixl::aarch64::x21.GetCode()
112 : vixl::aarch64::x20.GetCode()),
113 vixl::aarch64::x30.GetCode());
114 const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kFPRegister,
115 vixl::aarch64::kDRegSize,
116 vixl::aarch64::d8.GetCode(),
117 vixl::aarch64::d15.GetCode());
118 Location ARM64ReturnLocation(DataType::Type return_type);
119
120 class SlowPathCodeARM64 : public SlowPathCode {
121 public:
SlowPathCodeARM64(HInstruction * instruction)122 explicit SlowPathCodeARM64(HInstruction* instruction)
123 : SlowPathCode(instruction), entry_label_(), exit_label_() {}
124
GetEntryLabel()125 vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; }
GetExitLabel()126 vixl::aarch64::Label* GetExitLabel() { return &exit_label_; }
127
128 void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
129 void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
130
131 private:
132 vixl::aarch64::Label entry_label_;
133 vixl::aarch64::Label exit_label_;
134
135 DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
136 };
137
138 class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
139 public:
JumpTableARM64(HPackedSwitch * switch_instr)140 explicit JumpTableARM64(HPackedSwitch* switch_instr)
141 : switch_instr_(switch_instr), table_start_() {}
142
GetTableStartLabel()143 vixl::aarch64::Label* GetTableStartLabel() { return &table_start_; }
144
145 void EmitTable(CodeGeneratorARM64* codegen);
146
147 private:
148 HPackedSwitch* const switch_instr_;
149 vixl::aarch64::Label table_start_;
150
151 DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
152 };
153
154 static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] =
155 { vixl::aarch64::x0,
156 vixl::aarch64::x1,
157 vixl::aarch64::x2,
158 vixl::aarch64::x3,
159 vixl::aarch64::x4,
160 vixl::aarch64::x5,
161 vixl::aarch64::x6,
162 vixl::aarch64::x7 };
163 static constexpr size_t kRuntimeParameterCoreRegistersLength =
164 arraysize(kRuntimeParameterCoreRegisters);
165 static const vixl::aarch64::FPRegister kRuntimeParameterFpuRegisters[] =
166 { vixl::aarch64::d0,
167 vixl::aarch64::d1,
168 vixl::aarch64::d2,
169 vixl::aarch64::d3,
170 vixl::aarch64::d4,
171 vixl::aarch64::d5,
172 vixl::aarch64::d6,
173 vixl::aarch64::d7 };
174 static constexpr size_t kRuntimeParameterFpuRegistersLength =
175 arraysize(kRuntimeParameterCoreRegisters);
176
177 class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
178 vixl::aarch64::FPRegister> {
179 public:
180 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
181
InvokeRuntimeCallingConvention()182 InvokeRuntimeCallingConvention()
183 : CallingConvention(kRuntimeParameterCoreRegisters,
184 kRuntimeParameterCoreRegistersLength,
185 kRuntimeParameterFpuRegisters,
186 kRuntimeParameterFpuRegistersLength,
187 kArm64PointerSize) {}
188
189 Location GetReturnLocation(DataType::Type return_type);
190
191 private:
192 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
193 };
194
195 class InvokeDexCallingConvention : public CallingConvention<vixl::aarch64::Register,
196 vixl::aarch64::FPRegister> {
197 public:
InvokeDexCallingConvention()198 InvokeDexCallingConvention()
199 : CallingConvention(kParameterCoreRegisters,
200 kParameterCoreRegistersLength,
201 kParameterFPRegisters,
202 kParameterFPRegistersLength,
203 kArm64PointerSize) {}
204
GetReturnLocation(DataType::Type return_type)205 Location GetReturnLocation(DataType::Type return_type) const {
206 return ARM64ReturnLocation(return_type);
207 }
208
209
210 private:
211 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
212 };
213
214 class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
215 public:
InvokeDexCallingConventionVisitorARM64()216 InvokeDexCallingConventionVisitorARM64() {}
~InvokeDexCallingConventionVisitorARM64()217 virtual ~InvokeDexCallingConventionVisitorARM64() {}
218
219 Location GetNextLocation(DataType::Type type) override;
GetReturnLocation(DataType::Type return_type)220 Location GetReturnLocation(DataType::Type return_type) const override {
221 return calling_convention.GetReturnLocation(return_type);
222 }
223 Location GetMethodLocation() const override;
224
225 private:
226 InvokeDexCallingConvention calling_convention;
227
228 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
229 };
230
231 class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
232 public:
FieldAccessCallingConventionARM64()233 FieldAccessCallingConventionARM64() {}
234
GetObjectLocation()235 Location GetObjectLocation() const override {
236 return helpers::LocationFrom(vixl::aarch64::x1);
237 }
GetFieldIndexLocation()238 Location GetFieldIndexLocation() const override {
239 return helpers::LocationFrom(vixl::aarch64::x0);
240 }
GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED)241 Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
242 return helpers::LocationFrom(vixl::aarch64::x0);
243 }
GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,bool is_instance)244 Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
245 bool is_instance) const override {
246 return is_instance
247 ? helpers::LocationFrom(vixl::aarch64::x2)
248 : helpers::LocationFrom(vixl::aarch64::x1);
249 }
GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED)250 Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
251 return helpers::LocationFrom(vixl::aarch64::d0);
252 }
253
254 private:
255 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
256 };
257
258 class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
259 public:
260 InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
261
262 #define DECLARE_VISIT_INSTRUCTION(name, super) \
263 void Visit##name(H##name* instr) override;
264
265 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)266 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
267 FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
268
269 #undef DECLARE_VISIT_INSTRUCTION
270
271 void VisitInstruction(HInstruction* instruction) override {
272 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
273 << " (id " << instruction->GetId() << ")";
274 }
275
GetAssembler()276 Arm64Assembler* GetAssembler() const { return assembler_; }
GetVIXLAssembler()277 vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
278
279 private:
280 void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
281 vixl::aarch64::Register class_reg);
282 void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
283 vixl::aarch64::Register temp);
284 void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
285 void HandleBinaryOp(HBinaryOperation* instr);
286
287 void HandleFieldSet(HInstruction* instruction,
288 const FieldInfo& field_info,
289 bool value_can_be_null);
290 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
291 void HandleCondition(HCondition* instruction);
292
293 // Generate a heap reference load using one register `out`:
294 //
295 // out <- *(out + offset)
296 //
297 // while honoring heap poisoning and/or read barriers (if any).
298 //
299 // Location `maybe_temp` is used when generating a read barrier and
300 // shall be a register in that case; it may be an invalid location
301 // otherwise.
302 void GenerateReferenceLoadOneRegister(HInstruction* instruction,
303 Location out,
304 uint32_t offset,
305 Location maybe_temp,
306 ReadBarrierOption read_barrier_option);
307 // Generate a heap reference load using two different registers
308 // `out` and `obj`:
309 //
310 // out <- *(obj + offset)
311 //
312 // while honoring heap poisoning and/or read barriers (if any).
313 //
314 // Location `maybe_temp` is used when generating a Baker's (fast
315 // path) read barrier and shall be a register in that case; it may
316 // be an invalid location otherwise.
317 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
318 Location out,
319 Location obj,
320 uint32_t offset,
321 Location maybe_temp,
322 ReadBarrierOption read_barrier_option);
323
324 // Generate a floating-point comparison.
325 void GenerateFcmp(HInstruction* instruction);
326
327 void HandleShift(HBinaryOperation* instr);
328 void GenerateTestAndBranch(HInstruction* instruction,
329 size_t condition_input_index,
330 vixl::aarch64::Label* true_target,
331 vixl::aarch64::Label* false_target);
332 void DivRemOneOrMinusOne(HBinaryOperation* instruction);
333 void DivRemByPowerOfTwo(HBinaryOperation* instruction);
334 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
335 void GenerateIntDiv(HDiv* instruction);
336 void GenerateIntDivForConstDenom(HDiv *instruction);
337 void GenerateIntDivForPower2Denom(HDiv *instruction);
338 void GenerateIntRem(HRem* instruction);
339 void GenerateIntRemForConstDenom(HRem *instruction);
340 void GenerateIntRemForPower2Denom(HRem *instruction);
341 void HandleGoto(HInstruction* got, HBasicBlock* successor);
342
343 vixl::aarch64::MemOperand VecAddress(
344 HVecMemoryOperation* instruction,
345 // This function may acquire a scratch register.
346 vixl::aarch64::UseScratchRegisterScope* temps_scope,
347 size_t size,
348 bool is_string_char_at,
349 /*out*/ vixl::aarch64::Register* scratch);
350
351 Arm64Assembler* const assembler_;
352 CodeGeneratorARM64* const codegen_;
353
354 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorARM64);
355 };
356
357 class LocationsBuilderARM64 : public HGraphVisitor {
358 public:
LocationsBuilderARM64(HGraph * graph,CodeGeneratorARM64 * codegen)359 LocationsBuilderARM64(HGraph* graph, CodeGeneratorARM64* codegen)
360 : HGraphVisitor(graph), codegen_(codegen) {}
361
362 #define DECLARE_VISIT_INSTRUCTION(name, super) \
363 void Visit##name(H##name* instr) override;
364
365 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)366 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
367 FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
368
369 #undef DECLARE_VISIT_INSTRUCTION
370
371 void VisitInstruction(HInstruction* instruction) override {
372 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
373 << " (id " << instruction->GetId() << ")";
374 }
375
376 private:
377 void HandleBinaryOp(HBinaryOperation* instr);
378 void HandleFieldSet(HInstruction* instruction);
379 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
380 void HandleInvoke(HInvoke* instr);
381 void HandleCondition(HCondition* instruction);
382 void HandleShift(HBinaryOperation* instr);
383
384 CodeGeneratorARM64* const codegen_;
385 InvokeDexCallingConventionVisitorARM64 parameter_visitor_;
386
387 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
388 };
389
390 class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
391 public:
ParallelMoveResolverARM64(ArenaAllocator * allocator,CodeGeneratorARM64 * codegen)392 ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
393 : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {}
394
395 protected:
396 void PrepareForEmitNativeCode() override;
397 void FinishEmitNativeCode() override;
398 Location AllocateScratchLocationFor(Location::Kind kind) override;
399 void FreeScratchLocation(Location loc) override;
400 void EmitMove(size_t index) override;
401
402 private:
403 Arm64Assembler* GetAssembler() const;
GetVIXLAssembler()404 vixl::aarch64::MacroAssembler* GetVIXLAssembler() const {
405 return GetAssembler()->GetVIXLAssembler();
406 }
407
408 CodeGeneratorARM64* const codegen_;
409 vixl::aarch64::UseScratchRegisterScope vixl_temps_;
410
411 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
412 };
413
414 class CodeGeneratorARM64 : public CodeGenerator {
415 public:
416 CodeGeneratorARM64(HGraph* graph,
417 const CompilerOptions& compiler_options,
418 OptimizingCompilerStats* stats = nullptr);
~CodeGeneratorARM64()419 virtual ~CodeGeneratorARM64() {}
420
421 void GenerateFrameEntry() override;
422 void GenerateFrameExit() override;
423
424 vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const;
425 vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const;
426
427 void Bind(HBasicBlock* block) override;
428
GetLabelOf(HBasicBlock * block)429 vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) {
430 block = FirstNonEmptyBlock(block);
431 return &(block_labels_[block->GetBlockId()]);
432 }
433
GetWordSize()434 size_t GetWordSize() const override {
435 return kArm64WordSize;
436 }
437
GetFloatingPointSpillSlotSize()438 size_t GetFloatingPointSpillSlotSize() const override {
439 return GetGraph()->HasSIMD()
440 ? 2 * kArm64WordSize // 16 bytes == 2 arm64 words for each spill
441 : 1 * kArm64WordSize; // 8 bytes == 1 arm64 words for each spill
442 }
443
GetAddressOf(HBasicBlock * block)444 uintptr_t GetAddressOf(HBasicBlock* block) override {
445 vixl::aarch64::Label* block_entry_label = GetLabelOf(block);
446 DCHECK(block_entry_label->IsBound());
447 return block_entry_label->GetLocation();
448 }
449
GetLocationBuilder()450 HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
GetInstructionVisitor()451 HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
GetAssembler()452 Arm64Assembler* GetAssembler() override { return &assembler_; }
GetAssembler()453 const Arm64Assembler& GetAssembler() const override { return assembler_; }
GetVIXLAssembler()454 vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
455
456 // Emit a write barrier.
457 void MarkGCCard(vixl::aarch64::Register object,
458 vixl::aarch64::Register value,
459 bool value_can_be_null);
460
461 void GenerateMemoryBarrier(MemBarrierKind kind);
462
463 // Register allocation.
464
465 void SetupBlockedRegisters() const override;
466
467 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
468 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
469 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
470 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
471
472 // The number of registers that can be allocated. The register allocator may
473 // decide to reserve and not use a few of them.
474 // We do not consider registers sp, xzr, wzr. They are either not allocatable
475 // (xzr, wzr), or make for poor allocatable registers (sp alignment
476 // requirements, etc.). This also facilitates our task as all other registers
477 // can easily be mapped via to or from their type and index or code.
478 static const int kNumberOfAllocatableRegisters = vixl::aarch64::kNumberOfRegisters - 1;
479 static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfFPRegisters;
480 static constexpr int kNumberOfAllocatableRegisterPairs = 0;
481
482 void DumpCoreRegister(std::ostream& stream, int reg) const override;
483 void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
484
GetInstructionSet()485 InstructionSet GetInstructionSet() const override {
486 return InstructionSet::kArm64;
487 }
488
489 const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const;
490
Initialize()491 void Initialize() override {
492 block_labels_.resize(GetGraph()->GetBlocks().size());
493 }
494
495 // We want to use the STP and LDP instructions to spill and restore registers for slow paths.
496 // These instructions can only encode offsets that are multiples of the register size accessed.
GetPreferredSlotsAlignment()497 uint32_t GetPreferredSlotsAlignment() const override { return vixl::aarch64::kXRegSizeInBytes; }
498
CreateJumpTable(HPackedSwitch * switch_instr)499 JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
500 jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARM64(switch_instr));
501 return jump_tables_.back().get();
502 }
503
504 void Finalize(CodeAllocator* allocator) override;
505
506 // Code generation helpers.
507 void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
508 void MoveConstant(Location destination, int32_t value) override;
509 void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
510 void AddLocationAsTemp(Location location, LocationSummary* locations) override;
511
512 void Load(DataType::Type type,
513 vixl::aarch64::CPURegister dst,
514 const vixl::aarch64::MemOperand& src);
515 void Store(DataType::Type type,
516 vixl::aarch64::CPURegister src,
517 const vixl::aarch64::MemOperand& dst);
518 void LoadAcquire(HInstruction* instruction,
519 vixl::aarch64::CPURegister dst,
520 const vixl::aarch64::MemOperand& src,
521 bool needs_null_check);
522 void StoreRelease(HInstruction* instruction,
523 DataType::Type type,
524 vixl::aarch64::CPURegister src,
525 const vixl::aarch64::MemOperand& dst,
526 bool needs_null_check);
527
528 // Generate code to invoke a runtime entry point.
529 void InvokeRuntime(QuickEntrypointEnum entrypoint,
530 HInstruction* instruction,
531 uint32_t dex_pc,
532 SlowPathCode* slow_path = nullptr) override;
533
534 // Generate code to invoke a runtime entry point, but do not record
535 // PC-related information in a stack map.
536 void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
537 HInstruction* instruction,
538 SlowPathCode* slow_path);
539
GetMoveResolver()540 ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; }
541
NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED)542 bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
543 return false;
544 }
545
546 // Check if the desired_string_load_kind is supported. If it is, return it,
547 // otherwise return a fall-back kind that should be used instead.
548 HLoadString::LoadKind GetSupportedLoadStringKind(
549 HLoadString::LoadKind desired_string_load_kind) override;
550
551 // Check if the desired_class_load_kind is supported. If it is, return it,
552 // otherwise return a fall-back kind that should be used instead.
553 HLoadClass::LoadKind GetSupportedLoadClassKind(
554 HLoadClass::LoadKind desired_class_load_kind) override;
555
556 // Check if the desired_dispatch_info is supported. If it is, return it,
557 // otherwise return a fall-back info that should be used instead.
558 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
559 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
560 ArtMethod* method) override;
561
562 void GenerateStaticOrDirectCall(
563 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
564 void GenerateVirtualCall(
565 HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
566
MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,DataType::Type type ATTRIBUTE_UNUSED)567 void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
568 DataType::Type type ATTRIBUTE_UNUSED) override {
569 UNIMPLEMENTED(FATAL);
570 }
571
572 // Add a new boot image intrinsic patch for an instruction and return the label
573 // to be bound before the instruction. The instruction will be either the
574 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
575 // to the associated ADRP patch label).
576 vixl::aarch64::Label* NewBootImageIntrinsicPatch(uint32_t intrinsic_data,
577 vixl::aarch64::Label* adrp_label = nullptr);
578
579 // Add a new boot image relocation patch for an instruction and return the label
580 // to be bound before the instruction. The instruction will be either the
581 // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
582 // to the associated ADRP patch label).
583 vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset,
584 vixl::aarch64::Label* adrp_label = nullptr);
585
586 // Add a new boot image method patch for an instruction and return the label
587 // to be bound before the instruction. The instruction will be either the
588 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
589 // to the associated ADRP patch label).
590 vixl::aarch64::Label* NewBootImageMethodPatch(MethodReference target_method,
591 vixl::aarch64::Label* adrp_label = nullptr);
592
593 // Add a new .bss entry method patch for an instruction and return
594 // the label to be bound before the instruction. The instruction will be
595 // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
596 // pointing to the associated ADRP patch label).
597 vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method,
598 vixl::aarch64::Label* adrp_label = nullptr);
599
600 // Add a new boot image type patch for an instruction and return the label
601 // to be bound before the instruction. The instruction will be either the
602 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
603 // to the associated ADRP patch label).
604 vixl::aarch64::Label* NewBootImageTypePatch(const DexFile& dex_file,
605 dex::TypeIndex type_index,
606 vixl::aarch64::Label* adrp_label = nullptr);
607
608 // Add a new .bss entry type patch for an instruction and return the label
609 // to be bound before the instruction. The instruction will be either the
610 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
611 // to the associated ADRP patch label).
612 vixl::aarch64::Label* NewBssEntryTypePatch(const DexFile& dex_file,
613 dex::TypeIndex type_index,
614 vixl::aarch64::Label* adrp_label = nullptr);
615
616 // Add a new boot image string patch for an instruction and return the label
617 // to be bound before the instruction. The instruction will be either the
618 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
619 // to the associated ADRP patch label).
620 vixl::aarch64::Label* NewBootImageStringPatch(const DexFile& dex_file,
621 dex::StringIndex string_index,
622 vixl::aarch64::Label* adrp_label = nullptr);
623
624 // Add a new .bss entry string patch for an instruction and return the label
625 // to be bound before the instruction. The instruction will be either the
626 // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
627 // to the associated ADRP patch label).
628 vixl::aarch64::Label* NewStringBssEntryPatch(const DexFile& dex_file,
629 dex::StringIndex string_index,
630 vixl::aarch64::Label* adrp_label = nullptr);
631
632 // Emit the CBNZ instruction for baker read barrier and record
633 // the associated patch for AOT or slow path for JIT.
634 void EmitBakerReadBarrierCbnz(uint32_t custom_data);
635
636 vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
637 vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
638 dex::StringIndex string_index,
639 Handle<mirror::String> handle);
640 vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file,
641 dex::TypeIndex string_index,
642 Handle<mirror::Class> handle);
643
644 void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
645 void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
646 vixl::aarch64::Register out,
647 vixl::aarch64::Register base);
648 void EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
649 vixl::aarch64::Register out,
650 vixl::aarch64::Register base);
651
652 void LoadBootImageAddress(vixl::aarch64::Register reg, uint32_t boot_image_reference);
653 void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset);
654
655 void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
656 bool NeedsThunkCode(const linker::LinkerPatch& patch) const override;
657 void EmitThunkCode(const linker::LinkerPatch& patch,
658 /*out*/ ArenaVector<uint8_t>* code,
659 /*out*/ std::string* debug_name) override;
660
661 void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
662
663 // Generate a GC root reference load:
664 //
665 // root <- *(obj + offset)
666 //
667 // while honoring read barriers based on read_barrier_option.
668 void GenerateGcRootFieldLoad(HInstruction* instruction,
669 Location root,
670 vixl::aarch64::Register obj,
671 uint32_t offset,
672 vixl::aarch64::Label* fixup_label,
673 ReadBarrierOption read_barrier_option);
674 // Generate MOV for the `old_value` in UnsafeCASObject and mark it with Baker read barrier.
675 void GenerateUnsafeCasOldValueMovWithBakerReadBarrier(vixl::aarch64::Register marked,
676 vixl::aarch64::Register old_value);
677 // Fast path implementation of ReadBarrier::Barrier for a heap
678 // reference field load when Baker's read barriers are used.
679 // Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
680 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
681 Location ref,
682 vixl::aarch64::Register obj,
683 const vixl::aarch64::MemOperand& src,
684 bool needs_null_check,
685 bool use_load_acquire);
686 // Fast path implementation of ReadBarrier::Barrier for a heap
687 // reference field load when Baker's read barriers are used.
688 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
689 Location ref,
690 vixl::aarch64::Register obj,
691 uint32_t offset,
692 Location maybe_temp,
693 bool needs_null_check,
694 bool use_load_acquire);
695 // Fast path implementation of ReadBarrier::Barrier for a heap
696 // reference array load when Baker's read barriers are used.
697 void GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
698 Location ref,
699 vixl::aarch64::Register obj,
700 uint32_t data_offset,
701 Location index,
702 bool needs_null_check);
703
704 // Emit code checking the status of the Marking Register, and
705 // aborting the program if MR does not match the value stored in the
706 // art::Thread object. Code is only emitted in debug mode and if
707 // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
708 //
709 // Argument `code` is used to identify the different occurrences of
710 // MaybeGenerateMarkingRegisterCheck in the code generator, and is
711 // passed to the BRK instruction.
712 //
713 // If `temp_loc` is a valid location, it is expected to be a
714 // register and will be used as a temporary to generate code;
715 // otherwise, a temporary will be fetched from the core register
716 // scratch pool.
717 virtual void MaybeGenerateMarkingRegisterCheck(int code,
718 Location temp_loc = Location::NoLocation());
719
720 // Generate a read barrier for a heap reference within `instruction`
721 // using a slow path.
722 //
723 // A read barrier for an object reference read from the heap is
724 // implemented as a call to the artReadBarrierSlow runtime entry
725 // point, which is passed the values in locations `ref`, `obj`, and
726 // `offset`:
727 //
728 // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
729 // mirror::Object* obj,
730 // uint32_t offset);
731 //
732 // The `out` location contains the value returned by
733 // artReadBarrierSlow.
734 //
735 // When `index` is provided (i.e. for array accesses), the offset
736 // value passed to artReadBarrierSlow is adjusted to take `index`
737 // into account.
738 void GenerateReadBarrierSlow(HInstruction* instruction,
739 Location out,
740 Location ref,
741 Location obj,
742 uint32_t offset,
743 Location index = Location::NoLocation());
744
745 // If read barriers are enabled, generate a read barrier for a heap
746 // reference using a slow path. If heap poisoning is enabled, also
747 // unpoison the reference in `out`.
748 void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
749 Location out,
750 Location ref,
751 Location obj,
752 uint32_t offset,
753 Location index = Location::NoLocation());
754
755 // Generate a read barrier for a GC root within `instruction` using
756 // a slow path.
757 //
758 // A read barrier for an object reference GC root is implemented as
759 // a call to the artReadBarrierForRootSlow runtime entry point,
760 // which is passed the value in location `root`:
761 //
762 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
763 //
764 // The `out` location contains the value returned by
765 // artReadBarrierForRootSlow.
766 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
767
768 void GenerateNop() override;
769
770 void GenerateImplicitNullCheck(HNullCheck* instruction) override;
771 void GenerateExplicitNullCheck(HNullCheck* instruction) override;
772
773 private:
774 // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
775
776 enum class BakerReadBarrierKind : uint8_t {
777 kField, // Field get or array get with constant offset (i.e. constant index).
778 kAcquire, // Volatile field get.
779 kArray, // Array get with index in register.
780 kGcRoot, // GC root load.
781 kLast = kGcRoot
782 };
783
784 static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u;
785
786 static constexpr size_t kBitsForBakerReadBarrierKind =
787 MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
788 static constexpr size_t kBakerReadBarrierBitsForRegister =
789 MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
790 using BakerReadBarrierKindField =
791 BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
792 using BakerReadBarrierFirstRegField =
793 BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
794 using BakerReadBarrierSecondRegField =
795 BitField<uint32_t,
796 kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
797 kBakerReadBarrierBitsForRegister>;
798
CheckValidReg(uint32_t reg)799 static void CheckValidReg(uint32_t reg) {
800 DCHECK(reg < vixl::aarch64::lr.GetCode() &&
801 reg != vixl::aarch64::ip0.GetCode() &&
802 reg != vixl::aarch64::ip1.GetCode()) << reg;
803 }
804
EncodeBakerReadBarrierFieldData(uint32_t base_reg,uint32_t holder_reg)805 static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
806 CheckValidReg(base_reg);
807 CheckValidReg(holder_reg);
808 return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
809 BakerReadBarrierFirstRegField::Encode(base_reg) |
810 BakerReadBarrierSecondRegField::Encode(holder_reg);
811 }
812
EncodeBakerReadBarrierAcquireData(uint32_t base_reg,uint32_t holder_reg)813 static inline uint32_t EncodeBakerReadBarrierAcquireData(uint32_t base_reg, uint32_t holder_reg) {
814 CheckValidReg(base_reg);
815 CheckValidReg(holder_reg);
816 DCHECK_NE(base_reg, holder_reg);
817 return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kAcquire) |
818 BakerReadBarrierFirstRegField::Encode(base_reg) |
819 BakerReadBarrierSecondRegField::Encode(holder_reg);
820 }
821
EncodeBakerReadBarrierArrayData(uint32_t base_reg)822 static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
823 CheckValidReg(base_reg);
824 return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
825 BakerReadBarrierFirstRegField::Encode(base_reg) |
826 BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
827 }
828
EncodeBakerReadBarrierGcRootData(uint32_t root_reg)829 static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
830 CheckValidReg(root_reg);
831 return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
832 BakerReadBarrierFirstRegField::Encode(root_reg) |
833 BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
834 }
835
836 void CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
837 uint32_t encoded_data,
838 /*out*/ std::string* debug_name);
839
840 using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
841 using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
842 using StringToLiteralMap = ArenaSafeMap<StringReference,
843 vixl::aarch64::Literal<uint32_t>*,
844 StringReferenceValueComparator>;
845 using TypeToLiteralMap = ArenaSafeMap<TypeReference,
846 vixl::aarch64::Literal<uint32_t>*,
847 TypeReferenceValueComparator>;
848
849 vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value);
850 vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
851
852 // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
853 // whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
854 struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> {
PcRelativePatchInfoPcRelativePatchInfo855 PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
856 : PatchInfo<vixl::aarch64::Label>(dex_file, off_or_idx), pc_insn_label() { }
857
858 vixl::aarch64::Label* pc_insn_label;
859 };
860
861 struct BakerReadBarrierPatchInfo {
BakerReadBarrierPatchInfoBakerReadBarrierPatchInfo862 explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
863
864 vixl::aarch64::Label label;
865 uint32_t custom_data;
866 };
867
868 vixl::aarch64::Label* NewPcRelativePatch(const DexFile* dex_file,
869 uint32_t offset_or_index,
870 vixl::aarch64::Label* adrp_label,
871 ArenaDeque<PcRelativePatchInfo>* patches);
872
873 void EmitJumpTables();
874
875 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
876 static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
877 ArenaVector<linker::LinkerPatch>* linker_patches);
878
879 // Labels for each block that will be compiled.
880 // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
881 ArenaDeque<vixl::aarch64::Label> block_labels_; // Indexed by block id.
882 vixl::aarch64::Label frame_entry_label_;
883 ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
884
885 LocationsBuilderARM64 location_builder_;
886 InstructionCodeGeneratorARM64 instruction_visitor_;
887 ParallelMoveResolverARM64 move_resolver_;
888 Arm64Assembler assembler_;
889
890 // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
891 Uint32ToLiteralMap uint32_literals_;
892 // Deduplication map for 64-bit literals, used for non-patchable method address or method code.
893 Uint64ToLiteralMap uint64_literals_;
894 // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo.
895 // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
896 ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
897 // PC-relative method patch info for kBssEntry.
898 ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
899 // PC-relative type patch info for kBootImageLinkTimePcRelative.
900 ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
901 // PC-relative type patch info for kBssEntry.
902 ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
903 // PC-relative String patch info for kBootImageLinkTimePcRelative.
904 ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
905 // PC-relative String patch info for kBssEntry.
906 ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
907 // PC-relative patch info for IntrinsicObjects.
908 ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_;
909 // Baker read barrier patch info.
910 ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
911
912 // Patches for string literals in JIT compiled code.
913 StringToLiteralMap jit_string_patches_;
914 // Patches for class literals in JIT compiled code.
915 TypeToLiteralMap jit_class_patches_;
916
917 // Baker read barrier slow paths, mapping custom data (uint32_t) to label.
918 // Wrap the label to work around vixl::aarch64::Label being non-copyable
919 // and non-moveable and as such unusable in ArenaSafeMap<>.
920 struct LabelWrapper {
LabelWrapperLabelWrapper921 LabelWrapper(const LabelWrapper& src)
922 : label() {
923 DCHECK(!src.label.IsLinked() && !src.label.IsBound());
924 }
925 LabelWrapper() = default;
926 vixl::aarch64::Label label;
927 };
928 ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_;
929
930 friend class linker::Arm64RelativePatcherTest;
931 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
932 };
933
GetAssembler()934 inline Arm64Assembler* ParallelMoveResolverARM64::GetAssembler() const {
935 return codegen_->GetAssembler();
936 }
937
938 } // namespace arm64
939 } // namespace art
940
941 #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
942