• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19 
20 #include "arch/instruction_set.h"
21 #include "arch/instruction_set_features.h"
22 #include "base/arena_containers.h"
23 #include "base/arena_object.h"
24 #include "base/bit_field.h"
25 #include "base/bit_utils.h"
26 #include "base/enums.h"
27 #include "globals.h"
28 #include "graph_visualizer.h"
29 #include "locations.h"
30 #include "memory_region.h"
31 #include "nodes.h"
32 #include "optimizing_compiler_stats.h"
33 #include "read_barrier_option.h"
34 #include "stack.h"
35 #include "stack_map_stream.h"
36 #include "string_reference.h"
37 #include "type_reference.h"
38 #include "utils/label.h"
39 
40 namespace art {
41 
42 // Binary encoding of 2^32 for type double.
43 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
44 // Binary encoding of 2^31 for type double.
45 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
46 
47 // Minimum value for a primitive integer.
48 static int32_t constexpr kPrimIntMin = 0x80000000;
49 // Minimum value for a primitive long.
50 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
51 
52 // Maximum value for a primitive integer.
53 static int32_t constexpr kPrimIntMax = 0x7fffffff;
54 // Maximum value for a primitive long.
55 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
56 
57 static constexpr ReadBarrierOption kCompilerReadBarrierOption =
58     kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
59 
60 class Assembler;
61 class CodeGenerator;
62 class CompilerDriver;
63 class CompilerOptions;
64 class LinkerPatch;
65 class ParallelMoveResolver;
66 
67 class CodeAllocator {
68  public:
CodeAllocator()69   CodeAllocator() {}
~CodeAllocator()70   virtual ~CodeAllocator() {}
71 
72   virtual uint8_t* Allocate(size_t size) = 0;
73 
74  private:
75   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
76 };
77 
78 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
79  public:
SlowPathCode(HInstruction * instruction)80   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
81     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
82       saved_core_stack_offsets_[i] = kRegisterNotSaved;
83       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
84     }
85   }
86 
~SlowPathCode()87   virtual ~SlowPathCode() {}
88 
89   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
90 
91   // Save live core and floating-point caller-save registers and
92   // update the stack mask in `locations` for registers holding object
93   // references.
94   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
95   // Restore live core and floating-point caller-save registers.
96   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
97 
IsCoreRegisterSaved(int reg)98   bool IsCoreRegisterSaved(int reg) const {
99     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
100   }
101 
IsFpuRegisterSaved(int reg)102   bool IsFpuRegisterSaved(int reg) const {
103     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
104   }
105 
GetStackOffsetOfCoreRegister(int reg)106   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
107     return saved_core_stack_offsets_[reg];
108   }
109 
GetStackOffsetOfFpuRegister(int reg)110   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
111     return saved_fpu_stack_offsets_[reg];
112   }
113 
IsFatal()114   virtual bool IsFatal() const { return false; }
115 
116   virtual const char* GetDescription() const = 0;
117 
GetEntryLabel()118   Label* GetEntryLabel() { return &entry_label_; }
GetExitLabel()119   Label* GetExitLabel() { return &exit_label_; }
120 
GetInstruction()121   HInstruction* GetInstruction() const {
122     return instruction_;
123   }
124 
GetDexPc()125   uint32_t GetDexPc() const {
126     return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
127   }
128 
129  protected:
130   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
131   static constexpr uint32_t kRegisterNotSaved = -1;
132   // The instruction where this slow path is happening.
133   HInstruction* instruction_;
134   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
135   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
136 
137  private:
138   Label entry_label_;
139   Label exit_label_;
140 
141   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
142 };
143 
144 class InvokeDexCallingConventionVisitor {
145  public:
146   virtual Location GetNextLocation(Primitive::Type type) = 0;
147   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
148   virtual Location GetMethodLocation() const = 0;
149 
150  protected:
InvokeDexCallingConventionVisitor()151   InvokeDexCallingConventionVisitor() {}
~InvokeDexCallingConventionVisitor()152   virtual ~InvokeDexCallingConventionVisitor() {}
153 
154   // The current index for core registers.
155   uint32_t gp_index_ = 0u;
156   // The current index for floating-point registers.
157   uint32_t float_index_ = 0u;
158   // The current stack index.
159   uint32_t stack_index_ = 0u;
160 
161  private:
162   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
163 };
164 
165 class FieldAccessCallingConvention {
166  public:
167   virtual Location GetObjectLocation() const = 0;
168   virtual Location GetFieldIndexLocation() const = 0;
169   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
170   virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
171   virtual Location GetFpuLocation(Primitive::Type type) const = 0;
~FieldAccessCallingConvention()172   virtual ~FieldAccessCallingConvention() {}
173 
174  protected:
FieldAccessCallingConvention()175   FieldAccessCallingConvention() {}
176 
177  private:
178   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
179 };
180 
181 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
182  public:
183   // Compiles the graph to executable instructions.
184   void Compile(CodeAllocator* allocator);
185   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
186                                                InstructionSet instruction_set,
187                                                const InstructionSetFeatures& isa_features,
188                                                const CompilerOptions& compiler_options,
189                                                OptimizingCompilerStats* stats = nullptr);
~CodeGenerator()190   virtual ~CodeGenerator() {}
191 
192   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
GetGraph()193   HGraph* GetGraph() const { return graph_; }
194 
195   HBasicBlock* GetNextBlockToEmit() const;
196   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
197   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
198 
GetStackSlotOfParameter(HParameterValue * parameter)199   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
200     // Note that this follows the current calling convention.
201     return GetFrameSize()
202         + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
203         + parameter->GetIndex() * kVRegSize;
204   }
205 
206   virtual void Initialize() = 0;
207   virtual void Finalize(CodeAllocator* allocator);
208   virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
209   virtual void GenerateFrameEntry() = 0;
210   virtual void GenerateFrameExit() = 0;
211   virtual void Bind(HBasicBlock* block) = 0;
212   virtual void MoveConstant(Location destination, int32_t value) = 0;
213   virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
214   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
215 
216   virtual Assembler* GetAssembler() = 0;
217   virtual const Assembler& GetAssembler() const = 0;
218   virtual size_t GetWordSize() const = 0;
219   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
220   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
221   void InitializeCodeGeneration(size_t number_of_spill_slots,
222                                 size_t maximum_safepoint_spill_size,
223                                 size_t number_of_out_slots,
224                                 const ArenaVector<HBasicBlock*>& block_order);
225   // Backends can override this as necessary. For most, no special alignment is required.
GetPreferredSlotsAlignment()226   virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
227 
GetFrameSize()228   uint32_t GetFrameSize() const { return frame_size_; }
SetFrameSize(uint32_t size)229   void SetFrameSize(uint32_t size) { frame_size_ = size; }
GetCoreSpillMask()230   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
GetFpuSpillMask()231   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
232 
GetNumberOfCoreRegisters()233   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
GetNumberOfFloatingPointRegisters()234   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
235   virtual void SetupBlockedRegisters() const = 0;
236 
ComputeSpillMask()237   virtual void ComputeSpillMask() {
238     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
239     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
240     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
241   }
242 
ComputeRegisterMask(const int * registers,size_t length)243   static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
244     uint32_t mask = 0;
245     for (size_t i = 0, e = length; i < e; ++i) {
246       mask |= (1 << registers[i]);
247     }
248     return mask;
249   }
250 
251   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
252   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
253   virtual InstructionSet GetInstructionSet() const = 0;
254 
GetCompilerOptions()255   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
256 
257   void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
258 
259   // Saves the register in the stack. Returns the size taken on stack.
260   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
261   // Restores the register from the stack. Returns the size taken on stack.
262   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
263 
264   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
265   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
266 
267   virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
268   // Returns whether we should split long moves in parallel moves.
ShouldSplitLongMoves()269   virtual bool ShouldSplitLongMoves() const { return false; }
270 
GetNumberOfCoreCalleeSaveRegisters()271   size_t GetNumberOfCoreCalleeSaveRegisters() const {
272     return POPCOUNT(core_callee_save_mask_);
273   }
274 
GetNumberOfCoreCallerSaveRegisters()275   size_t GetNumberOfCoreCallerSaveRegisters() const {
276     DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
277     return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
278   }
279 
IsCoreCalleeSaveRegister(int reg)280   bool IsCoreCalleeSaveRegister(int reg) const {
281     return (core_callee_save_mask_ & (1 << reg)) != 0;
282   }
283 
IsFloatingPointCalleeSaveRegister(int reg)284   bool IsFloatingPointCalleeSaveRegister(int reg) const {
285     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
286   }
287 
GetSlowPathSpills(LocationSummary * locations,bool core_registers)288   uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
289     DCHECK(locations->OnlyCallsOnSlowPath() ||
290            (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
291                !locations->HasCustomSlowPathCallingConvention()));
292     uint32_t live_registers = core_registers
293         ? locations->GetLiveRegisters()->GetCoreRegisters()
294         : locations->GetLiveRegisters()->GetFloatingPointRegisters();
295     if (locations->HasCustomSlowPathCallingConvention()) {
296       // Save only the live registers that the custom calling convention wants us to save.
297       uint32_t caller_saves = core_registers
298           ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
299           : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
300       return live_registers & caller_saves;
301     } else {
302       // Default ABI, we need to spill non-callee-save live registers.
303       uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
304       return live_registers & ~callee_saves;
305     }
306   }
307 
GetNumberOfSlowPathSpills(LocationSummary * locations,bool core_registers)308   size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
309     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
310   }
311 
GetStackOffsetOfShouldDeoptimizeFlag()312   size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
313     DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
314     DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
315     return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
316   }
317 
318   // Record native to dex mapping for a suspend point.  Required by runtime.
319   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
320   // Check whether we have already recorded mapping at this PC.
321   bool HasStackMapAtCurrentPc();
322   // Record extra stack maps if we support native debugging.
323   void MaybeRecordNativeDebugInfo(HInstruction* instruction,
324                                   uint32_t dex_pc,
325                                   SlowPathCode* slow_path = nullptr);
326 
327   bool CanMoveNullCheckToUser(HNullCheck* null_check);
328   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
329   LocationSummary* CreateThrowingSlowPathLocations(
330       HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
331   void GenerateNullCheck(HNullCheck* null_check);
332   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
333   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
334 
335   // Records a stack map which the runtime might use to set catch phi values
336   // during exception delivery.
337   // TODO: Replace with a catch-entering instruction that records the environment.
338   void RecordCatchBlockInfo();
339 
340   // TODO: Avoid creating the `std::unique_ptr` here.
AddSlowPath(SlowPathCode * slow_path)341   void AddSlowPath(SlowPathCode* slow_path) {
342     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
343   }
344 
345   void BuildStackMaps(MemoryRegion stack_map_region,
346                       MemoryRegion method_info_region,
347                       const DexFile::CodeItem& code_item);
348   void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
GetNumberOfJitRoots()349   size_t GetNumberOfJitRoots() const {
350     return jit_string_roots_.size() + jit_class_roots_.size();
351   }
352 
353   // Fills the `literals` array with literals collected during code generation.
354   // Also emits literal patches.
355   void EmitJitRoots(uint8_t* code,
356                     Handle<mirror::ObjectArray<mirror::Object>> roots,
357                     const uint8_t* roots_data)
358       REQUIRES_SHARED(Locks::mutator_lock_);
359 
IsLeafMethod()360   bool IsLeafMethod() const {
361     return is_leaf_;
362   }
363 
MarkNotLeaf()364   void MarkNotLeaf() {
365     is_leaf_ = false;
366     requires_current_method_ = true;
367   }
368 
SetRequiresCurrentMethod()369   void SetRequiresCurrentMethod() {
370     requires_current_method_ = true;
371   }
372 
RequiresCurrentMethod()373   bool RequiresCurrentMethod() const {
374     return requires_current_method_;
375   }
376 
377   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
378   // suspend check. This is called when the code generator generates code
379   // for the suspend check at the back edge (instead of where the suspend check
380   // is, which is the loop entry). At this point, the spill slots for the phis
381   // have not been written to.
382   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
383 
GetBlockedCoreRegisters()384   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
GetBlockedFloatingPointRegisters()385   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
386 
IsBlockedCoreRegister(size_t i)387   bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
IsBlockedFloatingPointRegister(size_t i)388   bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
389 
390   // Helper that returns the pointer offset of an index in an object array.
391   // Note: this method assumes we always have the same pointer size, regardless
392   // of the architecture.
393   static size_t GetCacheOffset(uint32_t index);
394   // Pointer variant for ArtMethod and ArtField arrays.
395   size_t GetCachePointerOffset(uint32_t index);
396 
397   // Helper that returns the offset of the array's length field.
398   // Note: Besides the normal arrays, we also use the HArrayLength for
399   // accessing the String's `count` field in String intrinsics.
400   static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
401 
402   // Helper that returns the offset of the array's data.
403   // Note: Besides the normal arrays, we also use the HArrayGet for
404   // accessing the String's `value` field in String intrinsics.
405   static uint32_t GetArrayDataOffset(HArrayGet* array_get);
406 
407   void EmitParallelMoves(Location from1,
408                          Location to1,
409                          Primitive::Type type1,
410                          Location from2,
411                          Location to2,
412                          Primitive::Type type2);
413 
StoreNeedsWriteBarrier(Primitive::Type type,HInstruction * value)414   static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
415     // Check that null value is not represented as an integer constant.
416     DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
417     return type == Primitive::kPrimNot && !value->IsNullConstant();
418   }
419 
420 
421   // Performs checks pertaining to an InvokeRuntime call.
422   void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
423                              HInstruction* instruction,
424                              SlowPathCode* slow_path);
425 
426   // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
427   static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
428                                                           SlowPathCode* slow_path);
429 
AddAllocatedRegister(Location location)430   void AddAllocatedRegister(Location location) {
431     allocated_registers_.Add(location);
432   }
433 
HasAllocatedRegister(bool is_core,int reg)434   bool HasAllocatedRegister(bool is_core, int reg) const {
435     return is_core
436         ? allocated_registers_.ContainsCoreRegister(reg)
437         : allocated_registers_.ContainsFloatingPointRegister(reg);
438   }
439 
440   void AllocateLocations(HInstruction* instruction);
441 
442   // Tells whether the stack frame of the compiled method is
443   // considered "empty", that is either actually having a size of zero,
444   // or just containing the saved return address register.
HasEmptyFrame()445   bool HasEmptyFrame() const {
446     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
447   }
448 
GetInt32ValueOf(HConstant * constant)449   static int32_t GetInt32ValueOf(HConstant* constant) {
450     if (constant->IsIntConstant()) {
451       return constant->AsIntConstant()->GetValue();
452     } else if (constant->IsNullConstant()) {
453       return 0;
454     } else {
455       DCHECK(constant->IsFloatConstant());
456       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
457     }
458   }
459 
GetInt64ValueOf(HConstant * constant)460   static int64_t GetInt64ValueOf(HConstant* constant) {
461     if (constant->IsIntConstant()) {
462       return constant->AsIntConstant()->GetValue();
463     } else if (constant->IsNullConstant()) {
464       return 0;
465     } else if (constant->IsFloatConstant()) {
466       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
467     } else if (constant->IsLongConstant()) {
468       return constant->AsLongConstant()->GetValue();
469     } else {
470       DCHECK(constant->IsDoubleConstant());
471       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
472     }
473   }
474 
GetFirstRegisterSlotInSlowPath()475   size_t GetFirstRegisterSlotInSlowPath() const {
476     return first_register_slot_in_slow_path_;
477   }
478 
FrameEntrySpillSize()479   uint32_t FrameEntrySpillSize() const {
480     return GetFpuSpillSize() + GetCoreSpillSize();
481   }
482 
483   virtual ParallelMoveResolver* GetMoveResolver() = 0;
484 
485   static void CreateCommonInvokeLocationSummary(
486       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
487 
488   void GenerateInvokeStaticOrDirectRuntimeCall(
489       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);
490   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
491 
492   void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke);
493 
494   void CreateUnresolvedFieldLocationSummary(
495       HInstruction* field_access,
496       Primitive::Type field_type,
497       const FieldAccessCallingConvention& calling_convention);
498 
499   void GenerateUnresolvedFieldAccess(
500       HInstruction* field_access,
501       Primitive::Type field_type,
502       uint32_t field_index,
503       uint32_t dex_pc,
504       const FieldAccessCallingConvention& calling_convention);
505 
506   static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
507                                                         Location runtime_type_index_location,
508                                                         Location runtime_return_location);
509   void GenerateLoadClassRuntimeCall(HLoadClass* cls);
510 
511   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
512 
SetDisassemblyInformation(DisassemblyInformation * info)513   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
GetDisassemblyInformation()514   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
515 
516   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
517                              HInstruction* instruction,
518                              uint32_t dex_pc,
519                              SlowPathCode* slow_path = nullptr) = 0;
520 
521   // Check if the desired_string_load_kind is supported. If it is, return it,
522   // otherwise return a fall-back kind that should be used instead.
523   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
524       HLoadString::LoadKind desired_string_load_kind) = 0;
525 
526   // Check if the desired_class_load_kind is supported. If it is, return it,
527   // otherwise return a fall-back kind that should be used instead.
528   virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
529       HLoadClass::LoadKind desired_class_load_kind) = 0;
530 
GetLoadStringCallKind(HLoadString * load)531   static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
532     switch (load->GetLoadKind()) {
533       case HLoadString::LoadKind::kBssEntry:
534         DCHECK(load->NeedsEnvironment());
535         return LocationSummary::kCallOnSlowPath;
536       case HLoadString::LoadKind::kRuntimeCall:
537         DCHECK(load->NeedsEnvironment());
538         return LocationSummary::kCallOnMainOnly;
539       case HLoadString::LoadKind::kJitTableAddress:
540         DCHECK(!load->NeedsEnvironment());
541         return kEmitCompilerReadBarrier
542             ? LocationSummary::kCallOnSlowPath
543             : LocationSummary::kNoCall;
544         break;
545       default:
546         DCHECK(!load->NeedsEnvironment());
547         return LocationSummary::kNoCall;
548     }
549   }
550 
551   // Check if the desired_dispatch_info is supported. If it is, return it,
552   // otherwise return a fall-back info that should be used instead.
553   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
554       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
555       HInvokeStaticOrDirect* invoke) = 0;
556 
557   // Generate a call to a static or direct method.
558   virtual void GenerateStaticOrDirectCall(
559       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
560   // Generate a call to a virtual method.
561   virtual void GenerateVirtualCall(
562       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
563 
564   // Copy the result of a call into the given target.
565   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
566 
567   virtual void GenerateNop() = 0;
568 
569   static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass);
570 
571  protected:
572   // Patch info used for recording locations of required linker patches and their targets,
573   // i.e. target method, string, type or code identified by their dex file and index.
574   template <typename LabelType>
575   struct PatchInfo {
PatchInfoPatchInfo576     PatchInfo(const DexFile& target_dex_file, uint32_t target_index)
577         : dex_file(target_dex_file), index(target_index) { }
578 
579     const DexFile& dex_file;
580     uint32_t index;
581     LabelType label;
582   };
583 
CodeGenerator(HGraph * graph,size_t number_of_core_registers,size_t number_of_fpu_registers,size_t number_of_register_pairs,uint32_t core_callee_save_mask,uint32_t fpu_callee_save_mask,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)584   CodeGenerator(HGraph* graph,
585                 size_t number_of_core_registers,
586                 size_t number_of_fpu_registers,
587                 size_t number_of_register_pairs,
588                 uint32_t core_callee_save_mask,
589                 uint32_t fpu_callee_save_mask,
590                 const CompilerOptions& compiler_options,
591                 OptimizingCompilerStats* stats)
592       : frame_size_(0),
593         core_spill_mask_(0),
594         fpu_spill_mask_(0),
595         first_register_slot_in_slow_path_(0),
596         allocated_registers_(RegisterSet::Empty()),
597         blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
598                                                                     kArenaAllocCodeGenerator)),
599         blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
600                                                                    kArenaAllocCodeGenerator)),
601         number_of_core_registers_(number_of_core_registers),
602         number_of_fpu_registers_(number_of_fpu_registers),
603         number_of_register_pairs_(number_of_register_pairs),
604         core_callee_save_mask_(core_callee_save_mask),
605         fpu_callee_save_mask_(fpu_callee_save_mask),
606         stack_map_stream_(graph->GetArena(), graph->GetInstructionSet()),
607         block_order_(nullptr),
608         jit_string_roots_(StringReferenceValueComparator(),
609                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
610         jit_class_roots_(TypeReferenceValueComparator(),
611                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
612         disasm_info_(nullptr),
613         stats_(stats),
614         graph_(graph),
615         compiler_options_(compiler_options),
616         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
617         current_slow_path_(nullptr),
618         current_block_index_(0),
619         is_leaf_(true),
620         requires_current_method_(false) {
621     slow_paths_.reserve(8);
622   }
623 
624   virtual HGraphVisitor* GetLocationBuilder() = 0;
625   virtual HGraphVisitor* GetInstructionVisitor() = 0;
626 
627   // Returns the location of the first spilled entry for floating point registers,
628   // relative to the stack pointer.
GetFpuSpillStart()629   uint32_t GetFpuSpillStart() const {
630     return GetFrameSize() - FrameEntrySpillSize();
631   }
632 
GetFpuSpillSize()633   uint32_t GetFpuSpillSize() const {
634     return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
635   }
636 
GetCoreSpillSize()637   uint32_t GetCoreSpillSize() const {
638     return POPCOUNT(core_spill_mask_) * GetWordSize();
639   }
640 
HasAllocatedCalleeSaveRegisters()641   virtual bool HasAllocatedCalleeSaveRegisters() const {
642     // We check the core registers against 1 because it always comprises the return PC.
643     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
644       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
645   }
646 
CallPushesPC()647   bool CallPushesPC() const {
648     InstructionSet instruction_set = GetInstructionSet();
649     return instruction_set == kX86 || instruction_set == kX86_64;
650   }
651 
652   // Arm64 has its own type for a label, so we need to templatize these methods
653   // to share the logic.
654 
655   template <typename LabelType>
CommonInitializeLabels()656   LabelType* CommonInitializeLabels() {
657     // We use raw array allocations instead of ArenaVector<> because Labels are
658     // non-constructible and non-movable and as such cannot be held in a vector.
659     size_t size = GetGraph()->GetBlocks().size();
660     LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
661                                                                       kArenaAllocCodeGenerator);
662     for (size_t i = 0; i != size; ++i) {
663       new(labels + i) LabelType();
664     }
665     return labels;
666   }
667 
668   template <typename LabelType>
CommonGetLabelOf(LabelType * raw_pointer_to_labels_array,HBasicBlock * block)669   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
670     block = FirstNonEmptyBlock(block);
671     return raw_pointer_to_labels_array + block->GetBlockId();
672   }
673 
GetCurrentSlowPath()674   SlowPathCode* GetCurrentSlowPath() {
675     return current_slow_path_;
676   }
677 
678   // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
EmitJitRootPatches(uint8_t * code ATTRIBUTE_UNUSED,const uint8_t * roots_data ATTRIBUTE_UNUSED)679   virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
680                                   const uint8_t* roots_data ATTRIBUTE_UNUSED) {
681     DCHECK_EQ(jit_string_roots_.size(), 0u);
682     DCHECK_EQ(jit_class_roots_.size(), 0u);
683   }
684 
685   // Frame size required for this method.
686   uint32_t frame_size_;
687   uint32_t core_spill_mask_;
688   uint32_t fpu_spill_mask_;
689   uint32_t first_register_slot_in_slow_path_;
690 
691   // Registers that were allocated during linear scan.
692   RegisterSet allocated_registers_;
693 
694   // Arrays used when doing register allocation to know which
695   // registers we can allocate. `SetupBlockedRegisters` updates the
696   // arrays.
697   bool* const blocked_core_registers_;
698   bool* const blocked_fpu_registers_;
699   size_t number_of_core_registers_;
700   size_t number_of_fpu_registers_;
701   size_t number_of_register_pairs_;
702   const uint32_t core_callee_save_mask_;
703   const uint32_t fpu_callee_save_mask_;
704 
705   StackMapStream stack_map_stream_;
706 
707   // The order to use for code generation.
708   const ArenaVector<HBasicBlock*>* block_order_;
709 
710   // Maps a StringReference (dex_file, string_index) to the index in the literal table.
711   // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
712   // will compute all the indices.
713   ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
714 
715   // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
716   // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
717   // will compute all the indices.
718   ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
719 
720   DisassemblyInformation* disasm_info_;
721 
722  private:
723   size_t GetStackOffsetOfSavedRegister(size_t index);
724   void GenerateSlowPaths();
725   void BlockIfInRegister(Location location, bool is_out = false) const;
726   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
727 
728   OptimizingCompilerStats* stats_;
729 
730   HGraph* const graph_;
731   const CompilerOptions& compiler_options_;
732 
733   ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
734 
735   // The current slow-path that we're generating code for.
736   SlowPathCode* current_slow_path_;
737 
738   // The current block index in `block_order_` of the block
739   // we are generating code for.
740   size_t current_block_index_;
741 
742   // Whether the method is a leaf method.
743   bool is_leaf_;
744 
745   // Whether an instruction in the graph accesses the current method.
746   // TODO: Rename: this actually indicates that some instruction in the method
747   // needs the environment including a valid stack frame.
748   bool requires_current_method_;
749 
750   friend class OptimizingCFITest;
751 
752   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
753 };
754 
755 template <typename C, typename F>
756 class CallingConvention {
757  public:
CallingConvention(const C * registers,size_t number_of_registers,const F * fpu_registers,size_t number_of_fpu_registers,PointerSize pointer_size)758   CallingConvention(const C* registers,
759                     size_t number_of_registers,
760                     const F* fpu_registers,
761                     size_t number_of_fpu_registers,
762                     PointerSize pointer_size)
763       : registers_(registers),
764         number_of_registers_(number_of_registers),
765         fpu_registers_(fpu_registers),
766         number_of_fpu_registers_(number_of_fpu_registers),
767         pointer_size_(pointer_size) {}
768 
GetNumberOfRegisters()769   size_t GetNumberOfRegisters() const { return number_of_registers_; }
GetNumberOfFpuRegisters()770   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
771 
GetRegisterAt(size_t index)772   C GetRegisterAt(size_t index) const {
773     DCHECK_LT(index, number_of_registers_);
774     return registers_[index];
775   }
776 
GetFpuRegisterAt(size_t index)777   F GetFpuRegisterAt(size_t index) const {
778     DCHECK_LT(index, number_of_fpu_registers_);
779     return fpu_registers_[index];
780   }
781 
GetStackOffsetOf(size_t index)782   size_t GetStackOffsetOf(size_t index) const {
783     // We still reserve the space for parameters passed by registers.
784     // Add space for the method pointer.
785     return static_cast<size_t>(pointer_size_) + index * kVRegSize;
786   }
787 
788  private:
789   const C* registers_;
790   const size_t number_of_registers_;
791   const F* fpu_registers_;
792   const size_t number_of_fpu_registers_;
793   const PointerSize pointer_size_;
794 
795   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
796 };
797 
798 /**
799  * A templated class SlowPathGenerator with a templated method NewSlowPath()
800  * that can be used by any code generator to share equivalent slow-paths with
801  * the objective of reducing generated code size.
802  *
803  * InstructionType:  instruction that requires SlowPathCodeType
804  * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
805  */
806 template <typename InstructionType>
807 class SlowPathGenerator {
808   static_assert(std::is_base_of<HInstruction, InstructionType>::value,
809                 "InstructionType is not a subclass of art::HInstruction");
810 
811  public:
SlowPathGenerator(HGraph * graph,CodeGenerator * codegen)812   SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
813       : graph_(graph),
814         codegen_(codegen),
815         slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
816 
817   // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
818   // Templating the method (rather than the whole class) on the slow-path type enables
819   // keeping this code at a generic, non architecture-specific place.
820   //
821   // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
822   //       To relax this requirement, we would need some RTTI on the stored slow-paths,
823   //       or template the class as a whole on SlowPathType.
824   template <typename SlowPathCodeType>
NewSlowPath(InstructionType * instruction)825   SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
826     static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
827                   "SlowPathCodeType is not a subclass of art::SlowPathCode");
828     static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
829                   "SlowPathCodeType is not constructible from InstructionType*");
830     // Iterate over potential candidates for sharing. Currently, only same-typed
831     // slow-paths with exactly the same dex-pc are viable candidates.
832     // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
833     const uint32_t dex_pc = instruction->GetDexPc();
834     auto iter = slow_path_map_.find(dex_pc);
835     if (iter != slow_path_map_.end()) {
836       const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
837       for (const auto& it : candidates) {
838         InstructionType* other_instruction = it.first;
839         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
840         // Determine if the instructions allow for slow-path sharing.
841         if (HaveSameLiveRegisters(instruction, other_instruction) &&
842             HaveSameStackMap(instruction, other_instruction)) {
843           // Can share: reuse existing one.
844           return other_slow_path;
845         }
846       }
847     } else {
848       // First time this dex-pc is seen.
849       iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
850     }
851     // Cannot share: create and add new slow-path for this particular dex-pc.
852     SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
853     iter->second.emplace_back(std::make_pair(instruction, slow_path));
854     codegen_->AddSlowPath(slow_path);
855     return slow_path;
856   }
857 
858  private:
859   // Tests if both instructions have same set of live physical registers. This ensures
860   // the slow-path has exactly the same preamble on saving these registers to stack.
HaveSameLiveRegisters(const InstructionType * i1,const InstructionType * i2)861   bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
862     const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
863     const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
864     RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
865     RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
866     return (((live1->GetCoreRegisters() & core_spill) ==
867              (live2->GetCoreRegisters() & core_spill)) &&
868             ((live1->GetFloatingPointRegisters() & fpu_spill) ==
869              (live2->GetFloatingPointRegisters() & fpu_spill)));
870   }
871 
872   // Tests if both instructions have the same stack map. This ensures the interpreter
873   // will find exactly the same dex-registers at the same entries.
HaveSameStackMap(const InstructionType * i1,const InstructionType * i2)874   bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
875     DCHECK(i1->HasEnvironment());
876     DCHECK(i2->HasEnvironment());
877     // We conservatively test if the two instructions find exactly the same instructions
878     // and location in each dex-register. This guarantees they will have the same stack map.
879     HEnvironment* e1 = i1->GetEnvironment();
880     HEnvironment* e2 = i2->GetEnvironment();
881     if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
882       return false;
883     }
884     for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
885       if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
886           !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
887         return false;
888       }
889     }
890     return true;
891   }
892 
893   HGraph* const graph_;
894   CodeGenerator* const codegen_;
895 
896   // Map from dex-pc to vector of already existing instruction/slow-path pairs.
897   ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
898 
899   DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
900 };
901 
902 class InstructionCodeGenerator : public HGraphVisitor {
903  public:
InstructionCodeGenerator(HGraph * graph,CodeGenerator * codegen)904   InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
905       : HGraphVisitor(graph),
906         deopt_slow_paths_(graph, codegen) {}
907 
908  protected:
909   // Add slow-path generator for each instruction/slow-path combination that desires sharing.
910   // TODO: under current regime, only deopt sharing make sense; extend later.
911   SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
912 };
913 
914 }  // namespace art
915 
916 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
917