• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19 
20 #include "arch/instruction_set.h"
21 #include "arch/instruction_set_features.h"
22 #include "base/arena_containers.h"
23 #include "base/arena_object.h"
24 #include "base/array_ref.h"
25 #include "base/bit_field.h"
26 #include "base/bit_utils.h"
27 #include "base/globals.h"
28 #include "base/macros.h"
29 #include "base/memory_region.h"
30 #include "base/pointer_size.h"
31 #include "class_root.h"
32 #include "dex/proto_reference.h"
33 #include "dex/string_reference.h"
34 #include "dex/type_reference.h"
35 #include "graph_visualizer.h"
36 #include "locations.h"
37 #include "mirror/method_type.h"
38 #include "nodes.h"
39 #include "oat/oat_quick_method_header.h"
40 #include "optimizing_compiler_stats.h"
41 #include "read_barrier_option.h"
42 #include "stack.h"
43 #include "subtype_check.h"
44 #include "utils/assembler.h"
45 #include "utils/label.h"
46 
47 namespace art HIDDEN {
48 
49 // Binary encoding of 2^32 for type double.
50 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
51 // Binary encoding of 2^31 for type double.
52 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
53 
54 // Minimum value for a primitive integer.
55 static int32_t constexpr kPrimIntMin = 0x80000000;
56 // Minimum value for a primitive long.
57 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
58 
59 // Maximum value for a primitive integer.
60 static int32_t constexpr kPrimIntMax = 0x7fffffff;
61 // Maximum value for a primitive long.
62 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
63 
64 constexpr size_t kClassStatusLsbPosition = SubtypeCheckBits::BitStructSizeOf();
65 constexpr size_t kClassStatusByteOffset =
66     mirror::Class::StatusOffset().SizeValue() + (kClassStatusLsbPosition / kBitsPerByte);
67 constexpr uint32_t kShiftedVisiblyInitializedValue = enum_cast<uint32_t>(
68     ClassStatus::kVisiblyInitialized) << (kClassStatusLsbPosition % kBitsPerByte);
69 constexpr uint32_t kShiftedInitializingValue =
70     enum_cast<uint32_t>(ClassStatus::kInitializing) << (kClassStatusLsbPosition % kBitsPerByte);
71 constexpr uint32_t kShiftedInitializedValue =
72     enum_cast<uint32_t>(ClassStatus::kInitialized) << (kClassStatusLsbPosition % kBitsPerByte);
73 
74 class Assembler;
75 class CodeGenerationData;
76 class CodeGenerator;
77 class CompilerOptions;
78 class StackMapStream;
79 class ParallelMoveResolver;
80 
81 namespace linker {
82 class LinkerPatch;
83 }  // namespace linker
84 
85 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
86  public:
SlowPathCode(HInstruction * instruction)87   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
88     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
89       saved_core_stack_offsets_[i] = kRegisterNotSaved;
90       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
91     }
92   }
93 
~SlowPathCode()94   virtual ~SlowPathCode() {}
95 
96   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
97 
98   // Save live core and floating-point caller-save registers and
99   // update the stack mask in `locations` for registers holding object
100   // references.
101   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
102   // Restore live core and floating-point caller-save registers.
103   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
104 
IsCoreRegisterSaved(int reg)105   bool IsCoreRegisterSaved(int reg) const {
106     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
107   }
108 
IsFpuRegisterSaved(int reg)109   bool IsFpuRegisterSaved(int reg) const {
110     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
111   }
112 
GetStackOffsetOfCoreRegister(int reg)113   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
114     return saved_core_stack_offsets_[reg];
115   }
116 
GetStackOffsetOfFpuRegister(int reg)117   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
118     return saved_fpu_stack_offsets_[reg];
119   }
120 
IsFatal()121   virtual bool IsFatal() const { return false; }
122 
123   virtual const char* GetDescription() const = 0;
124 
GetEntryLabel()125   Label* GetEntryLabel() { return &entry_label_; }
GetExitLabel()126   Label* GetExitLabel() { return &exit_label_; }
127 
GetInstruction()128   HInstruction* GetInstruction() const {
129     return instruction_;
130   }
131 
GetDexPc()132   uint32_t GetDexPc() const {
133     return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
134   }
135 
136  protected:
137   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
138   static constexpr uint32_t kRegisterNotSaved = -1;
139   // The instruction where this slow path is happening.
140   HInstruction* instruction_;
141   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
142   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
143 
144  private:
145   Label entry_label_;
146   Label exit_label_;
147 
148   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
149 };
150 
151 class InvokeDexCallingConventionVisitor {
152  public:
153   virtual Location GetNextLocation(DataType::Type type) = 0;
154   virtual Location GetReturnLocation(DataType::Type type) const = 0;
155   virtual Location GetMethodLocation() const = 0;
156 
157  protected:
InvokeDexCallingConventionVisitor()158   InvokeDexCallingConventionVisitor() {}
~InvokeDexCallingConventionVisitor()159   virtual ~InvokeDexCallingConventionVisitor() {}
160 
161   // The current index for core registers.
162   uint32_t gp_index_ = 0u;
163   // The current index for floating-point registers.
164   uint32_t float_index_ = 0u;
165   // The current stack index.
166   uint32_t stack_index_ = 0u;
167 
168  private:
169   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
170 };
171 
172 class FieldAccessCallingConvention {
173  public:
174   virtual Location GetObjectLocation() const = 0;
175   virtual Location GetFieldIndexLocation() const = 0;
176   virtual Location GetReturnLocation(DataType::Type type) const = 0;
177   virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0;
178   virtual Location GetFpuLocation(DataType::Type type) const = 0;
~FieldAccessCallingConvention()179   virtual ~FieldAccessCallingConvention() {}
180 
181  protected:
FieldAccessCallingConvention()182   FieldAccessCallingConvention() {}
183 
184  private:
185   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
186 };
187 
188 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
189  public:
190   // Compiles the graph to executable instructions.
191   void Compile();
192   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
193                                                const CompilerOptions& compiler_options,
194                                                OptimizingCompilerStats* stats = nullptr);
195   virtual ~CodeGenerator();
196 
197   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
GetGraph()198   HGraph* GetGraph() const { return graph_; }
199 
200   HBasicBlock* GetNextBlockToEmit() const;
201   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
202   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
203 
GetStackSlotOfParameter(HParameterValue * parameter)204   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
205     // Note that this follows the current calling convention.
206     return GetFrameSize()
207         + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
208         + parameter->GetIndex() * kVRegSize;
209   }
210 
211   virtual void Initialize() = 0;
212   virtual void Finalize();
213   virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
214   virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
215   virtual void EmitThunkCode(const linker::LinkerPatch& patch,
216                              /*out*/ ArenaVector<uint8_t>* code,
217                              /*out*/ std::string* debug_name);
218   virtual void GenerateFrameEntry() = 0;
219   virtual void GenerateFrameExit() = 0;
220   virtual void Bind(HBasicBlock* block) = 0;
221   virtual void MoveConstant(Location destination, int32_t value) = 0;
222   virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0;
223   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
224 
225   virtual Assembler* GetAssembler() = 0;
226   virtual const Assembler& GetAssembler() const = 0;
227   virtual size_t GetWordSize() const = 0;
228 
229   // Returns whether the target supports predicated SIMD instructions.
SupportsPredicatedSIMD()230   virtual bool SupportsPredicatedSIMD() const { return false; }
231 
232   // Get FP register width in bytes for spilling/restoring in the slow paths.
233   //
234   // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
235   // alias and live SIMD registers are forced to be spilled in full size in the slow paths.
GetSlowPathFPWidth()236   virtual size_t GetSlowPathFPWidth() const {
237     // Default implementation.
238     return GetCalleePreservedFPWidth();
239   }
240 
241   // Get FP register width required to be preserved by the target ABI.
242   virtual size_t GetCalleePreservedFPWidth() const  = 0;
243 
244   // Get the size of the target SIMD register in bytes.
245   virtual size_t GetSIMDRegisterWidth() const = 0;
246   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
247   void InitializeCodeGeneration(size_t number_of_spill_slots,
248                                 size_t maximum_safepoint_spill_size,
249                                 size_t number_of_out_slots,
250                                 const ArenaVector<HBasicBlock*>& block_order);
251   // Backends can override this as necessary. For most, no special alignment is required.
GetPreferredSlotsAlignment()252   virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
253 
GetFrameSize()254   uint32_t GetFrameSize() const { return frame_size_; }
SetFrameSize(uint32_t size)255   void SetFrameSize(uint32_t size) { frame_size_ = size; }
GetMaximumFrameSize()256   uint32_t GetMaximumFrameSize() const {
257     return GetStackOverflowReservedBytes(GetInstructionSet());
258   }
259 
GetCoreSpillMask()260   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
GetFpuSpillMask()261   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
262 
GetNumberOfCoreRegisters()263   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
GetNumberOfFloatingPointRegisters()264   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
265   virtual void SetupBlockedRegisters() const = 0;
266 
ComputeSpillMask()267   virtual void ComputeSpillMask() {
268     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
269     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
270     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
271   }
272 
273   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
274   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
275   virtual InstructionSet GetInstructionSet() const = 0;
276 
277   // Saves the register in the stack. Returns the size taken on stack.
278   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
279   // Restores the register from the stack. Returns the size taken on stack.
280   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
281 
282   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
283   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
284 
285   virtual bool NeedsTwoRegisters(DataType::Type type) const = 0;
286   // Returns whether we should split long moves in parallel moves.
ShouldSplitLongMoves()287   virtual bool ShouldSplitLongMoves() const { return false; }
288 
289   // Returns true if `invoke` is an implemented intrinsic in this codegen's arch.
IsImplementedIntrinsic(HInvoke * invoke)290   bool IsImplementedIntrinsic(HInvoke* invoke) const {
291     return invoke->IsIntrinsic() &&
292            !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())];
293   }
294 
GetNumberOfCoreCalleeSaveRegisters()295   size_t GetNumberOfCoreCalleeSaveRegisters() const {
296     return POPCOUNT(core_callee_save_mask_);
297   }
298 
GetNumberOfCoreCallerSaveRegisters()299   size_t GetNumberOfCoreCallerSaveRegisters() const {
300     DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
301     return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
302   }
303 
IsCoreCalleeSaveRegister(int reg)304   bool IsCoreCalleeSaveRegister(int reg) const {
305     return (core_callee_save_mask_ & (1 << reg)) != 0;
306   }
307 
IsFloatingPointCalleeSaveRegister(int reg)308   bool IsFloatingPointCalleeSaveRegister(int reg) const {
309     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
310   }
311 
GetSlowPathSpills(LocationSummary * locations,bool core_registers)312   uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
313     DCHECK(locations->OnlyCallsOnSlowPath() ||
314            (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
315                !locations->HasCustomSlowPathCallingConvention()));
316     uint32_t live_registers = core_registers
317         ? locations->GetLiveRegisters()->GetCoreRegisters()
318         : locations->GetLiveRegisters()->GetFloatingPointRegisters();
319     if (locations->HasCustomSlowPathCallingConvention()) {
320       // Save only the live registers that the custom calling convention wants us to save.
321       uint32_t caller_saves = core_registers
322           ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
323           : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
324       return live_registers & caller_saves;
325     } else {
326       // Default ABI, we need to spill non-callee-save live registers.
327       uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
328       return live_registers & ~callee_saves;
329     }
330   }
331 
GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)332   size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
333     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
334   }
335 
GetStackOffsetOfShouldDeoptimizeFlag()336   size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
337     DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
338     DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
339     return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
340   }
341 
342   // For stack overflow checks and native-debug-info entries without dex register
343   // mapping i.e. start of basic block or at frame entry.
344   void RecordPcInfoForFrameOrBlockEntry(uint32_t dex_pc = 0);
345 
346   // Record native to dex mapping for a suspend point.
347   // The native_pc is used from Assembler::CodePosition.
348   //
349   // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc
350   // for the instruction. If the exact native_pc is required it must be provided explicitly.
351   void RecordPcInfo(HInstruction* instruction,
352                     SlowPathCode* slow_path = nullptr,
353                     bool native_debug_info = false);
354 
355   // Record native to dex mapping for a suspend point. Required by runtime.
356   // Do not use directly. Use the method above.
357   void RecordPcInfo(HInstruction* instruction,
358                     uint32_t dex_pc,
359                     uint32_t native_pc,
360                     SlowPathCode* slow_path = nullptr,
361                     bool native_debug_info = false);
362 
363   // Check whether we have already recorded mapping at this PC.
364   bool HasStackMapAtCurrentPc();
365 
366   // Record extra stack maps if we support native debugging.
367   //
368   // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions
369   // corresponding the dex PC.
370   void MaybeRecordNativeDebugInfoForBlockEntry(uint32_t dex_pc);
371   void MaybeRecordNativeDebugInfo(HInstruction* instruction,
372                                   uint32_t dex_pc,
373                                   SlowPathCode* slow_path = nullptr);
374 
375   bool CanMoveNullCheckToUser(HNullCheck* null_check);
376   virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction);
377   LocationSummary* CreateThrowingSlowPathLocations(
378       HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
379   void GenerateNullCheck(HNullCheck* null_check);
380   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
381   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
382 
383   // Records a stack map which the runtime might use to set catch phi values
384   // during exception delivery.
385   // TODO: Replace with a catch-entering instruction that records the environment.
386   void RecordCatchBlockInfo();
387 
GetCompilerOptions()388   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
389   bool EmitReadBarrier() const;
390   bool EmitBakerReadBarrier() const;
391   bool EmitNonBakerReadBarrier() const;
392   ReadBarrierOption GetCompilerReadBarrierOption() const;
393 
394   // Returns true if we should check the GC card for consistency purposes.
395   bool ShouldCheckGCCard(DataType::Type type,
396                          HInstruction* value,
397                          WriteBarrierKind write_barrier_kind) const;
398 
399   // Get the ScopedArenaAllocator used for codegen memory allocation.
400   ScopedArenaAllocator* GetScopedAllocator();
401 
402   void AddSlowPath(SlowPathCode* slow_path);
403 
404   ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check);
405   size_t GetNumberOfJitRoots() const;
406 
407   // Fills the `literals` array with literals collected during code generation.
408   // Also emits literal patches.
409   void EmitJitRoots(uint8_t* code,
410                     const uint8_t* roots_data,
411                     /*out*/std::vector<Handle<mirror::Object>>* roots)
412       REQUIRES_SHARED(Locks::mutator_lock_);
413 
IsLeafMethod()414   bool IsLeafMethod() const {
415     return is_leaf_;
416   }
417 
MarkNotLeaf()418   void MarkNotLeaf() {
419     is_leaf_ = false;
420     requires_current_method_ = true;
421   }
422 
NeedsSuspendCheckEntry()423   bool NeedsSuspendCheckEntry() const {
424     return needs_suspend_check_entry_;
425   }
426 
MarkNeedsSuspendCheckEntry()427   void MarkNeedsSuspendCheckEntry() {
428     needs_suspend_check_entry_ = true;
429   }
430 
SetRequiresCurrentMethod()431   void SetRequiresCurrentMethod() {
432     requires_current_method_ = true;
433   }
434 
RequiresCurrentMethod()435   bool RequiresCurrentMethod() const {
436     return requires_current_method_;
437   }
438 
439   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
440   // suspend check. This is called when the code generator generates code
441   // for the suspend check at the back edge (instead of where the suspend check
442   // is, which is the loop entry). At this point, the spill slots for the phis
443   // have not been written to.
444   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check,
445                                              HParallelMove* spills) const;
446 
GetBlockedCoreRegisters()447   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
GetBlockedFloatingPointRegisters()448   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
449 
IsBlockedCoreRegister(size_t i)450   bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
IsBlockedFloatingPointRegister(size_t i)451   bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
452 
453   // Helper that returns the offset of the array's length field.
454   // Note: Besides the normal arrays, we also use the HArrayLength for
455   // accessing the String's `count` field in String intrinsics.
456   static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
457 
458   // Helper that returns the offset of the array's data.
459   // Note: Besides the normal arrays, we also use the HArrayGet for
460   // accessing the String's `value` field in String intrinsics.
461   static uint32_t GetArrayDataOffset(HArrayGet* array_get);
462 
463   void EmitParallelMoves(Location from1,
464                          Location to1,
465                          DataType::Type type1,
466                          Location from2,
467                          Location to2,
468                          DataType::Type type2);
469 
InstanceOfNeedsReadBarrier(HInstanceOf * instance_of)470   bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) {
471     // Used only for `kExactCheck`, `kAbstractClassCheck`, `kClassHierarchyCheck`,
472     // `kArrayObjectCheck` and `kInterfaceCheck`.
473     DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck ||
474            instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck ||
475            instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck ||
476            instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck ||
477            instance_of->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck)
478         << instance_of->GetTypeCheckKind();
479     // If the target class is in the boot or app image, it's non-moveable and it doesn't matter
480     // if we compare it with a from-space or to-space reference, the result is the same.
481     // It's OK to traverse a class hierarchy jumping between from-space and to-space.
482     return EmitReadBarrier() && !instance_of->GetTargetClass()->IsInImage();
483   }
484 
ReadBarrierOptionForInstanceOf(HInstanceOf * instance_of)485   ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) {
486     return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier;
487   }
488 
IsTypeCheckSlowPathFatal(HCheckCast * check_cast)489   bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) {
490     switch (check_cast->GetTypeCheckKind()) {
491       case TypeCheckKind::kExactCheck:
492       case TypeCheckKind::kAbstractClassCheck:
493       case TypeCheckKind::kClassHierarchyCheck:
494       case TypeCheckKind::kArrayObjectCheck:
495       case TypeCheckKind::kInterfaceCheck: {
496         bool needs_read_barrier =
497             EmitReadBarrier() && !check_cast->GetTargetClass()->IsInImage();
498         // We do not emit read barriers for HCheckCast, so we can get false negatives
499         // and the slow path shall re-check and simply return if the cast is actually OK.
500         return !needs_read_barrier;
501       }
502       case TypeCheckKind::kArrayCheck:
503       case TypeCheckKind::kUnresolvedCheck:
504         return false;
505       case TypeCheckKind::kBitstringCheck:
506         return true;
507     }
508     LOG(FATAL) << "Unreachable";
509     UNREACHABLE();
510   }
511 
GetCheckCastCallKind(HCheckCast * check_cast)512   LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) {
513     return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock())
514         ? LocationSummary::kNoCall  // In fact, call on a fatal (non-returning) slow path.
515         : LocationSummary::kCallOnSlowPath;
516   }
517 
StoreNeedsWriteBarrier(DataType::Type type,HInstruction * value)518   static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) {
519     // Check that null value is not represented as an integer constant.
520     DCHECK_IMPLIES(type == DataType::Type::kReference, !value->IsIntConstant());
521     return type == DataType::Type::kReference && !value->IsNullConstant();
522   }
523 
524   // If we are compiling a graph with the WBE pass enabled, we want to honor the WriteBarrierKind
525   // set during the WBE pass.
526   bool StoreNeedsWriteBarrier(DataType::Type type,
527                               HInstruction* value,
528                               WriteBarrierKind write_barrier_kind) const;
529 
530   // Performs checks pertaining to an InvokeRuntime call.
531   void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
532                              HInstruction* instruction,
533                              SlowPathCode* slow_path);
534 
535   // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
536   static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
537                                                           SlowPathCode* slow_path);
538 
AddAllocatedRegister(Location location)539   void AddAllocatedRegister(Location location) {
540     allocated_registers_.Add(location);
541   }
542 
HasAllocatedRegister(bool is_core,int reg)543   bool HasAllocatedRegister(bool is_core, int reg) const {
544     return is_core
545         ? allocated_registers_.ContainsCoreRegister(reg)
546         : allocated_registers_.ContainsFloatingPointRegister(reg);
547   }
548 
549   void AllocateLocations(HInstruction* instruction);
550 
551   // Tells whether the stack frame of the compiled method is
552   // considered "empty", that is either actually having a size of zero,
553   // or just containing the saved return address register.
HasEmptyFrame()554   bool HasEmptyFrame() const {
555     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
556   }
557 
GetInt8ValueOf(HConstant * constant)558   static int8_t GetInt8ValueOf(HConstant* constant) {
559     DCHECK(constant->IsIntConstant());
560     return constant->AsIntConstant()->GetValue();
561   }
562 
GetInt16ValueOf(HConstant * constant)563   static int16_t GetInt16ValueOf(HConstant* constant) {
564     DCHECK(constant->IsIntConstant());
565     return constant->AsIntConstant()->GetValue();
566   }
567 
GetInt32ValueOf(HConstant * constant)568   static int32_t GetInt32ValueOf(HConstant* constant) {
569     if (constant->IsIntConstant()) {
570       return constant->AsIntConstant()->GetValue();
571     } else if (constant->IsNullConstant()) {
572       return 0;
573     } else {
574       DCHECK(constant->IsFloatConstant());
575       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
576     }
577   }
578 
GetInt64ValueOf(HConstant * constant)579   static int64_t GetInt64ValueOf(HConstant* constant) {
580     if (constant->IsIntConstant()) {
581       return constant->AsIntConstant()->GetValue();
582     } else if (constant->IsNullConstant()) {
583       return 0;
584     } else if (constant->IsFloatConstant()) {
585       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
586     } else if (constant->IsLongConstant()) {
587       return constant->AsLongConstant()->GetValue();
588     } else {
589       DCHECK(constant->IsDoubleConstant());
590       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
591     }
592   }
593 
GetFirstRegisterSlotInSlowPath()594   size_t GetFirstRegisterSlotInSlowPath() const {
595     return first_register_slot_in_slow_path_;
596   }
597 
FrameEntrySpillSize()598   uint32_t FrameEntrySpillSize() const {
599     return GetFpuSpillSize() + GetCoreSpillSize();
600   }
601 
602   virtual ParallelMoveResolver* GetMoveResolver() = 0;
603 
604   static void CreateCommonInvokeLocationSummary(
605       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
606 
607   template <typename CriticalNativeCallingConventionVisitor,
608             size_t kNativeStackAlignment,
609             size_t GetCriticalNativeDirectCallFrameSize(std::string_view shorty)>
PrepareCriticalNativeCall(HInvokeStaticOrDirect * invoke)610   size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke) {
611       DCHECK(!invoke->GetLocations()->Intrinsified());
612       CriticalNativeCallingConventionVisitor calling_convention_visitor(
613           /*for_register_allocation=*/ false);
614       HParallelMove parallel_move(GetGraph()->GetAllocator());
615       PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, &parallel_move);
616       size_t out_frame_size =
617           RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment);
618       if (kIsDebugBuild) {
619         std::string_view shorty = GetCriticalNativeShorty(invoke);
620         CHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty), out_frame_size);
621       }
622       if (out_frame_size != 0u) {
623         FinishCriticalNativeFrameSetup(out_frame_size, &parallel_move);
624       }
625       return out_frame_size;
626   }
627 
628   void GenerateInvokeStaticOrDirectRuntimeCall(
629       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);
630 
631   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
632 
633   void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke, SlowPathCode* slow_path = nullptr);
634 
635   void GenerateInvokeCustomCall(HInvokeCustom* invoke);
636 
637   void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out);
638 
639   void CreateUnresolvedFieldLocationSummary(
640       HInstruction* field_access,
641       DataType::Type field_type,
642       const FieldAccessCallingConvention& calling_convention);
643 
644   void GenerateUnresolvedFieldAccess(
645       HInstruction* field_access,
646       DataType::Type field_type,
647       uint32_t field_index,
648       const FieldAccessCallingConvention& calling_convention);
649 
650   static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
651                                                         Location runtime_type_index_location,
652                                                         Location runtime_return_location);
653   void GenerateLoadClassRuntimeCall(HLoadClass* cls);
654 
655   static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle,
656                                                              Location runtime_handle_index_location,
657                                                              Location runtime_return_location);
658   void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle);
659 
660   static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type,
661                                                              Location runtime_type_index_location,
662                                                              Location runtime_return_location);
663   void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type);
664 
665   static uint32_t GetBootImageOffset(ObjPtr<mirror::Object> object)
666       REQUIRES_SHARED(Locks::mutator_lock_);
667   static uint32_t GetBootImageOffset(HLoadClass* load_class);
668   static uint32_t GetBootImageOffset(HLoadString* load_string);
669   static uint32_t GetBootImageOffset(HInvoke* invoke);
670   static uint32_t GetBootImageOffset(ClassRoot class_root);
671   static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke);
672 
673   static LocationSummary* CreateSystemArrayCopyLocationSummary(
674       HInvoke* invoke, int32_t length_threshold = -1, size_t num_temps = 3);
675 
SetDisassemblyInformation(DisassemblyInformation * info)676   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
GetDisassemblyInformation()677   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
678 
679   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
680                              HInstruction* instruction,
681                              SlowPathCode* slow_path = nullptr) = 0;
682 
683   // Check if the desired_string_load_kind is supported. If it is, return it,
684   // otherwise return a fall-back kind that should be used instead.
685   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
686       HLoadString::LoadKind desired_string_load_kind) = 0;
687 
688   // Check if the desired_class_load_kind is supported. If it is, return it,
689   // otherwise return a fall-back kind that should be used instead.
690   virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
691       HLoadClass::LoadKind desired_class_load_kind) = 0;
692 
GetLoadStringCallKind(HLoadString * load)693   LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
694     switch (load->GetLoadKind()) {
695       case HLoadString::LoadKind::kBssEntry:
696         DCHECK(load->NeedsEnvironment());
697         return LocationSummary::kCallOnSlowPath;
698       case HLoadString::LoadKind::kRuntimeCall:
699         DCHECK(load->NeedsEnvironment());
700         return LocationSummary::kCallOnMainOnly;
701       case HLoadString::LoadKind::kJitTableAddress:
702         DCHECK(!load->NeedsEnvironment());
703         return EmitReadBarrier()
704             ? LocationSummary::kCallOnSlowPath
705             : LocationSummary::kNoCall;
706       default:
707         DCHECK(!load->NeedsEnvironment());
708         return LocationSummary::kNoCall;
709     }
710   }
711 
712   // Check if the desired_dispatch_info is supported. If it is, return it,
713   // otherwise return a fall-back info that should be used instead.
714   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
715       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
716       ArtMethod* method) = 0;
717 
718   // Generate a call to a static or direct method.
719   virtual void GenerateStaticOrDirectCall(
720       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
721   // Generate a call to a virtual method.
722   virtual void GenerateVirtualCall(
723       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
724 
725   // Copy the result of a call into the given target.
726   virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0;
727 
728   virtual void IncreaseFrame(size_t adjustment) = 0;
729   virtual void DecreaseFrame(size_t adjustment) = 0;
730 
731   virtual void GenerateNop() = 0;
732 
733   static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array);
734   static ScaleFactor ScaleFactorForType(DataType::Type type);
735 
GetCode()736   ArrayRef<const uint8_t> GetCode() const {
737     return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(),
738                                    GetAssembler().CodeSize());
739   }
740 
741  protected:
742   // Patch info used for recording locations of required linker patches and their targets,
743   // i.e. target method, string, type or code identified by their dex file and index,
744   // or boot image .data.img.rel.ro entries identified by the boot image offset.
745   template <typename LabelType>
746   struct PatchInfo {
PatchInfoPatchInfo747     PatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
748         : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { }
749 
750     // Target dex file or null for boot image .data.img.rel.ro patches.
751     const DexFile* target_dex_file;
752     // Either the boot image offset (to write to .data.img.rel.ro) or string/type/method index.
753     uint32_t offset_or_index;
754     // Label for the instruction to patch.
755     LabelType label;
756   };
757 
758   CodeGenerator(HGraph* graph,
759                 size_t number_of_core_registers,
760                 size_t number_of_fpu_registers,
761                 size_t number_of_register_pairs,
762                 uint32_t core_callee_save_mask,
763                 uint32_t fpu_callee_save_mask,
764                 const CompilerOptions& compiler_options,
765                 OptimizingCompilerStats* stats,
766                 const art::ArrayRef<const bool>& unimplemented_intrinsics);
767 
768   virtual HGraphVisitor* GetLocationBuilder() = 0;
769   virtual HGraphVisitor* GetInstructionVisitor() = 0;
770 
771   template <typename RegType>
ComputeRegisterMask(const RegType * registers,size_t length)772   static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) {
773     uint32_t mask = 0;
774     for (size_t i = 0, e = length; i < e; ++i) {
775       mask |= (1 << registers[i]);
776     }
777     return mask;
778   }
779 
780   // Returns the location of the first spilled entry for floating point registers,
781   // relative to the stack pointer.
GetFpuSpillStart()782   uint32_t GetFpuSpillStart() const {
783     return GetFrameSize() - FrameEntrySpillSize();
784   }
785 
GetFpuSpillSize()786   uint32_t GetFpuSpillSize() const {
787     return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth();
788   }
789 
GetCoreSpillSize()790   uint32_t GetCoreSpillSize() const {
791     return POPCOUNT(core_spill_mask_) * GetWordSize();
792   }
793 
HasAllocatedCalleeSaveRegisters()794   virtual bool HasAllocatedCalleeSaveRegisters() const {
795     // We check the core registers against 1 because it always comprises the return PC.
796     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
797       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
798   }
799 
CallPushesPC()800   bool CallPushesPC() const {
801     InstructionSet instruction_set = GetInstructionSet();
802     return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64;
803   }
804 
805   // Arm64 has its own type for a label, so we need to templatize these methods
806   // to share the logic.
807 
808   template <typename LabelType>
CommonInitializeLabels()809   LabelType* CommonInitializeLabels() {
810     // We use raw array allocations instead of ArenaVector<> because Labels are
811     // non-constructible and non-movable and as such cannot be held in a vector.
812     size_t size = GetGraph()->GetBlocks().size();
813     LabelType* labels =
814         GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator);
815     for (size_t i = 0; i != size; ++i) {
816       new(labels + i) LabelType();
817     }
818     return labels;
819   }
820 
821   template <typename LabelType>
CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)822   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
823     block = FirstNonEmptyBlock(block);
824     return raw_pointer_to_labels_array + block->GetBlockId();
825   }
826 
GetCurrentSlowPath()827   SlowPathCode* GetCurrentSlowPath() {
828     return current_slow_path_;
829   }
830 
831   StackMapStream* GetStackMapStream();
832 
GetCodeGenerationData()833   CodeGenerationData* GetCodeGenerationData() {
834     return code_generation_data_.get();
835   }
836 
837   void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string);
838   uint64_t GetJitStringRootIndex(StringReference string_reference);
839   void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass);
840   uint64_t GetJitClassRootIndex(TypeReference type_reference);
841   void ReserveJitMethodTypeRoot(ProtoReference proto_reference,
842                                 Handle<mirror::MethodType> method_type);
843   uint64_t GetJitMethodTypeRootIndex(ProtoReference proto_reference);
844 
845   // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
846   virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data);
847 
848   // Frame size required for this method.
849   uint32_t frame_size_;
850   uint32_t core_spill_mask_;
851   uint32_t fpu_spill_mask_;
852   uint32_t first_register_slot_in_slow_path_;
853 
854   // Registers that were allocated during linear scan.
855   RegisterSet allocated_registers_;
856 
857   // Arrays used when doing register allocation to know which
858   // registers we can allocate. `SetupBlockedRegisters` updates the
859   // arrays.
860   bool* const blocked_core_registers_;
861   bool* const blocked_fpu_registers_;
862   size_t number_of_core_registers_;
863   size_t number_of_fpu_registers_;
864   size_t number_of_register_pairs_;
865   const uint32_t core_callee_save_mask_;
866   const uint32_t fpu_callee_save_mask_;
867 
868   // The order to use for code generation.
869   const ArenaVector<HBasicBlock*>* block_order_;
870 
871   DisassemblyInformation* disasm_info_;
872 
873  private:
874   void InitializeCodeGenerationData();
875   size_t GetStackOffsetOfSavedRegister(size_t index);
876   void GenerateSlowPaths();
877   void BlockIfInRegister(Location location, bool is_out = false) const;
878   void EmitEnvironment(HEnvironment* environment,
879                        SlowPathCode* slow_path,
880                        bool needs_vreg_info = true,
881                        bool is_for_catch_handler = false,
882                        bool innermost_environment = true);
883   void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler);
884   void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment);
885 
886   static void PrepareCriticalNativeArgumentMoves(
887       HInvokeStaticOrDirect* invoke,
888       /*inout*/InvokeDexCallingConventionVisitor* visitor,
889       /*out*/HParallelMove* parallel_move);
890 
891   void FinishCriticalNativeFrameSetup(size_t out_frame_size, /*inout*/HParallelMove* parallel_move);
892 
893   static std::string_view GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke);
894 
895   OptimizingCompilerStats* stats_;
896 
897   HGraph* const graph_;
898   const CompilerOptions& compiler_options_;
899 
900   // The current slow-path that we're generating code for.
901   SlowPathCode* current_slow_path_;
902 
903   // The current block index in `block_order_` of the block
904   // we are generating code for.
905   size_t current_block_index_;
906 
907   // Whether the method is a leaf method.
908   bool is_leaf_;
909 
910   // Whether the method has to emit a SuspendCheck at entry.
911   bool needs_suspend_check_entry_;
912 
913   // Whether an instruction in the graph accesses the current method.
914   // TODO: Rename: this actually indicates that some instruction in the method
915   // needs the environment including a valid stack frame.
916   bool requires_current_method_;
917 
918   // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the
919   // ArenaStack memory allocated in previous passes instead of adding to the memory
920   // held by the ArenaAllocator. This ScopedArenaAllocator is created in
921   // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed.
922   std::unique_ptr<CodeGenerationData> code_generation_data_;
923 
924   // Which intrinsics we don't have handcrafted code for.
925   art::ArrayRef<const bool> unimplemented_intrinsics_;
926 
927   friend class OptimizingCFITest;
928   ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD);
929   ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD);
930 
931   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
932 };
933 
934 template <typename C, typename F>
935 class CallingConvention {
936  public:
CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)937   CallingConvention(const C* registers,
938                     size_t number_of_registers,
939                     const F* fpu_registers,
940                     size_t number_of_fpu_registers,
941                     PointerSize pointer_size)
942       : registers_(registers),
943         number_of_registers_(number_of_registers),
944         fpu_registers_(fpu_registers),
945         number_of_fpu_registers_(number_of_fpu_registers),
946         pointer_size_(pointer_size) {}
947 
GetNumberOfRegisters()948   size_t GetNumberOfRegisters() const { return number_of_registers_; }
GetNumberOfFpuRegisters()949   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
950 
GetRegisterAt(size_t index)951   C GetRegisterAt(size_t index) const {
952     DCHECK_LT(index, number_of_registers_);
953     return registers_[index];
954   }
955 
GetFpuRegisterAt(size_t index)956   F GetFpuRegisterAt(size_t index) const {
957     DCHECK_LT(index, number_of_fpu_registers_);
958     return fpu_registers_[index];
959   }
960 
GetStackOffsetOf(size_t index)961   size_t GetStackOffsetOf(size_t index) const {
962     // We still reserve the space for parameters passed by registers.
963     // Add space for the method pointer.
964     return static_cast<size_t>(pointer_size_) + index * kVRegSize;
965   }
966 
967  private:
968   const C* registers_;
969   const size_t number_of_registers_;
970   const F* fpu_registers_;
971   const size_t number_of_fpu_registers_;
972   const PointerSize pointer_size_;
973 
974   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
975 };
976 
977 /**
978  * A templated class SlowPathGenerator with a templated method NewSlowPath()
979  * that can be used by any code generator to share equivalent slow-paths with
980  * the objective of reducing generated code size.
981  *
982  * InstructionType:  instruction that requires SlowPathCodeType
983  * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
984  */
985 template <typename InstructionType>
986 class SlowPathGenerator {
987   static_assert(std::is_base_of<HInstruction, InstructionType>::value,
988                 "InstructionType is not a subclass of art::HInstruction");
989 
990  public:
SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)991   SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
992       : graph_(graph),
993         codegen_(codegen),
994         slow_path_map_(std::less<uint32_t>(),
995                        graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {}
996 
997   // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
998   // Templating the method (rather than the whole class) on the slow-path type enables
999   // keeping this code at a generic, non architecture-specific place.
1000   //
1001   // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
1002   //       To relax this requirement, we would need some RTTI on the stored slow-paths,
1003   //       or template the class as a whole on SlowPathType.
1004   template <typename SlowPathCodeType>
NewSlowPath(InstructionType * instruction)1005   SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
1006     static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
1007                   "SlowPathCodeType is not a subclass of art::SlowPathCode");
1008     static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
1009                   "SlowPathCodeType is not constructible from InstructionType*");
1010     // Iterate over potential candidates for sharing. Currently, only same-typed
1011     // slow-paths with exactly the same dex-pc are viable candidates.
1012     // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
1013     const uint32_t dex_pc = instruction->GetDexPc();
1014     auto iter = slow_path_map_.find(dex_pc);
1015     if (iter != slow_path_map_.end()) {
1016       const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
1017       for (const auto& it : candidates) {
1018         InstructionType* other_instruction = it.first;
1019         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
1020         // Determine if the instructions allow for slow-path sharing.
1021         if (HaveSameLiveRegisters(instruction, other_instruction) &&
1022             HaveSameStackMap(instruction, other_instruction)) {
1023           // Can share: reuse existing one.
1024           return other_slow_path;
1025         }
1026       }
1027     } else {
1028       // First time this dex-pc is seen.
1029       iter = slow_path_map_.Put(dex_pc,
1030                                 {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}});
1031     }
1032     // Cannot share: create and add new slow-path for this particular dex-pc.
1033     SlowPathCodeType* slow_path =
1034         new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction);
1035     iter->second.emplace_back(std::make_pair(instruction, slow_path));
1036     codegen_->AddSlowPath(slow_path);
1037     return slow_path;
1038   }
1039 
1040  private:
1041   // Tests if both instructions have same set of live physical registers. This ensures
1042   // the slow-path has exactly the same preamble on saving these registers to stack.
HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)1043   bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
1044     const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
1045     const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
1046     RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
1047     RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
1048     return (((live1->GetCoreRegisters() & core_spill) ==
1049              (live2->GetCoreRegisters() & core_spill)) &&
1050             ((live1->GetFloatingPointRegisters() & fpu_spill) ==
1051              (live2->GetFloatingPointRegisters() & fpu_spill)));
1052   }
1053 
1054   // Tests if both instructions have the same stack map. This ensures the interpreter
1055   // will find exactly the same dex-registers at the same entries.
HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)1056   bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
1057     DCHECK(i1->HasEnvironment());
1058     DCHECK(i2->HasEnvironment());
1059     // We conservatively test if the two instructions find exactly the same instructions
1060     // and location in each dex-register. This guarantees they will have the same stack map.
1061     HEnvironment* e1 = i1->GetEnvironment();
1062     HEnvironment* e2 = i2->GetEnvironment();
1063     if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
1064       return false;
1065     }
1066     for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
1067       if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
1068           !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
1069         return false;
1070       }
1071     }
1072     return true;
1073   }
1074 
1075   HGraph* const graph_;
1076   CodeGenerator* const codegen_;
1077 
1078   // Map from dex-pc to vector of already existing instruction/slow-path pairs.
1079   ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
1080 
1081   DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
1082 };
1083 
1084 class InstructionCodeGenerator : public HGraphVisitor {
1085  public:
InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)1086   InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
1087       : HGraphVisitor(graph),
1088         deopt_slow_paths_(graph, codegen) {}
1089 
1090  protected:
1091   // Add slow-path generator for each instruction/slow-path combination that desires sharing.
1092   // TODO: under current regime, only deopt sharing make sense; extend later.
1093   SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
1094 };
1095 
1096 }  // namespace art
1097 
1098 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
1099