• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
19 
20 #include "arch/x86_64/instruction_set_features_x86_64.h"
21 #include "base/macros.h"
22 #include "code_generator.h"
23 #include "driver/compiler_options.h"
24 #include "nodes.h"
25 #include "parallel_move_resolver.h"
26 #include "utils/x86_64/assembler_x86_64.h"
27 
28 namespace art HIDDEN {
29 namespace x86_64 {
30 
31 static constexpr Register kMethodRegisterArgument = RDI;
32 
33 // Use a local definition to prevent copying mistakes.
34 static constexpr size_t kX86_64WordSize = static_cast<size_t>(kX86_64PointerSize);
35 
36 // Some x86_64 instructions require a register to be available as temp.
37 static constexpr Register TMP = R11;
38 
39 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
40 static constexpr FloatRegister kParameterFloatRegisters[] =
41     { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
42 
43 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
44 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
45 
46 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX };
47 static constexpr size_t kRuntimeParameterCoreRegistersLength =
48     arraysize(kRuntimeParameterCoreRegisters);
49 static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
50 static constexpr size_t kRuntimeParameterFpuRegistersLength =
51     arraysize(kRuntimeParameterFpuRegisters);
52 
53 // These XMM registers are non-volatile in ART ABI, but volatile in native ABI.
54 // If the ART ABI changes, this list must be updated.  It is used to ensure that
55 // these are not clobbered by any direct call to native code (such as math intrinsics).
56 static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
57 
58 #define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \
59   V(MathSignumFloat)                           \
60   V(MathSignumDouble)                          \
61   V(MathCopySignFloat)                         \
62   V(MathCopySignDouble)                        \
63   V(CRC32Update)                               \
64   V(CRC32UpdateBytes)                          \
65   V(CRC32UpdateByteBuffer)                     \
66   V(FP16ToFloat)                               \
67   V(FP16ToHalf)                                \
68   V(FP16Floor)                                 \
69   V(FP16Ceil)                                  \
70   V(FP16Rint)                                  \
71   V(FP16Greater)                               \
72   V(FP16GreaterEquals)                         \
73   V(FP16Less)                                  \
74   V(FP16LessEquals)                            \
75   V(FP16Compare)                               \
76   V(FP16Min)                                   \
77   V(FP16Max)                                   \
78   V(IntegerRemainderUnsigned)                  \
79   V(LongRemainderUnsigned)                     \
80   V(StringStringIndexOf)                       \
81   V(StringStringIndexOfAfter)                  \
82   V(StringBufferAppend)                        \
83   V(StringBufferLength)                        \
84   V(StringBufferToString)                      \
85   V(StringBuilderAppendObject)                 \
86   V(StringBuilderAppendString)                 \
87   V(StringBuilderAppendCharSequence)           \
88   V(StringBuilderAppendCharArray)              \
89   V(StringBuilderAppendBoolean)                \
90   V(StringBuilderAppendChar)                   \
91   V(StringBuilderAppendInt)                    \
92   V(StringBuilderAppendLong)                   \
93   V(StringBuilderAppendFloat)                  \
94   V(StringBuilderAppendDouble)                 \
95   V(StringBuilderLength)                       \
96   V(StringBuilderToString)                     \
97   V(UnsafeArrayBaseOffset)                     \
98   /* 1.8 */                                    \
99   V(JdkUnsafeArrayBaseOffset)                  \
100   V(MethodHandleInvoke)                        \
101 
102 
103 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
104  public:
InvokeRuntimeCallingConvention()105   InvokeRuntimeCallingConvention()
106       : CallingConvention(kRuntimeParameterCoreRegisters,
107                           kRuntimeParameterCoreRegistersLength,
108                           kRuntimeParameterFpuRegisters,
109                           kRuntimeParameterFpuRegistersLength,
110                           kX86_64PointerSize) {}
111 
112  private:
113   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
114 };
115 
116 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
117  public:
InvokeDexCallingConvention()118   InvokeDexCallingConvention() : CallingConvention(
119       kParameterCoreRegisters,
120       kParameterCoreRegistersLength,
121       kParameterFloatRegisters,
122       kParameterFloatRegistersLength,
123       kX86_64PointerSize) {}
124 
125  private:
126   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
127 };
128 
129 class CriticalNativeCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
130  public:
CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)131   explicit CriticalNativeCallingConventionVisitorX86_64(bool for_register_allocation)
132       : for_register_allocation_(for_register_allocation) {}
133 
~CriticalNativeCallingConventionVisitorX86_64()134   virtual ~CriticalNativeCallingConventionVisitorX86_64() {}
135 
136   Location GetNextLocation(DataType::Type type) override;
137   Location GetReturnLocation(DataType::Type type) const override;
138   Location GetMethodLocation() const override;
139 
GetStackOffset()140   size_t GetStackOffset() const { return stack_offset_; }
141 
142  private:
143   // Register allocator does not support adjusting frame size, so we cannot provide final locations
144   // of stack arguments for register allocation. We ask the register allocator for any location and
145   // move these arguments to the right place after adjusting the SP when generating the call.
146   const bool for_register_allocation_;
147   size_t gpr_index_ = 0u;
148   size_t fpr_index_ = 0u;
149   size_t stack_offset_ = 0u;
150 
151   DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorX86_64);
152 };
153 
154 class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
155  public:
FieldAccessCallingConventionX86_64()156   FieldAccessCallingConventionX86_64() {}
157 
GetObjectLocation()158   Location GetObjectLocation() const override {
159     return Location::RegisterLocation(RSI);
160   }
GetFieldIndexLocation()161   Location GetFieldIndexLocation() const override {
162     return Location::RegisterLocation(RDI);
163   }
GetReturnLocation(DataType::Type type)164   Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
165     return Location::RegisterLocation(RAX);
166   }
GetSetValueLocation(DataType::Type type,bool is_instance)167   Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
168                                bool is_instance) const override {
169     return is_instance
170         ? Location::RegisterLocation(RDX)
171         : Location::RegisterLocation(RSI);
172   }
GetFpuLocation(DataType::Type type)173   Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
174     return Location::FpuRegisterLocation(XMM0);
175   }
176 
177  private:
178   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64);
179 };
180 
181 
182 class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
183  public:
InvokeDexCallingConventionVisitorX86_64()184   InvokeDexCallingConventionVisitorX86_64() {}
~InvokeDexCallingConventionVisitorX86_64()185   virtual ~InvokeDexCallingConventionVisitorX86_64() {}
186 
187   Location GetNextLocation(DataType::Type type) override;
188   Location GetReturnLocation(DataType::Type type) const override;
189   Location GetMethodLocation() const override;
190 
191  private:
192   InvokeDexCallingConvention calling_convention;
193 
194   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64);
195 };
196 
197 class CodeGeneratorX86_64;
198 
199 class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap {
200  public:
ParallelMoveResolverX86_64(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen)201   ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
202       : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
203 
204   void EmitMove(size_t index) override;
205   void EmitSwap(size_t index) override;
206   void SpillScratch(int reg) override;
207   void RestoreScratch(int reg) override;
208 
209   X86_64Assembler* GetAssembler() const;
210 
211  private:
212   void Exchange32(CpuRegister reg, int mem);
213   void Exchange32(XmmRegister reg, int mem);
214   void Exchange64(CpuRegister reg1, CpuRegister reg2);
215   void Exchange64(CpuRegister reg, int mem);
216   void Exchange64(XmmRegister reg, int mem);
217   void Exchange128(XmmRegister reg, int mem);
218   void ExchangeMemory32(int mem1, int mem2);
219   void ExchangeMemory64(int mem1, int mem2, int num_of_qwords);
220 
221   CodeGeneratorX86_64* const codegen_;
222 
223   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64);
224 };
225 
226 class LocationsBuilderX86_64 : public HGraphVisitor {
227  public:
LocationsBuilderX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)228   LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
229       : HGraphVisitor(graph), codegen_(codegen) {}
230 
231 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
232   void Visit##name(H##name* instr) override;
233 
234   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)235   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
236   FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
237 
238 #undef DECLARE_VISIT_INSTRUCTION
239 
240   void VisitInstruction(HInstruction* instruction) override {
241     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
242                << " (id " << instruction->GetId() << ")";
243   }
244 
245  private:
246   void HandleInvoke(HInvoke* invoke);
247   void HandleBitwiseOperation(HBinaryOperation* operation);
248   void HandleCondition(HCondition* condition);
249   void HandleShift(HBinaryOperation* operation);
250   void HandleRotate(HBinaryOperation* rotate);
251   void HandleFieldSet(HInstruction* instruction,
252                       const FieldInfo& field_info,
253                       WriteBarrierKind write_barrier_kind);
254   void HandleFieldGet(HInstruction* instruction);
255   bool CpuHasAvxFeatureFlag();
256   bool CpuHasAvx2FeatureFlag();
257 
258   CodeGeneratorX86_64* const codegen_;
259   InvokeDexCallingConventionVisitorX86_64 parameter_visitor_;
260 
261   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
262 };
263 
264 class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
265  public:
266   InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
267 
268 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
269   void Visit##name(H##name* instr) override;
270 
271   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)272   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
273   FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
274 
275 #undef DECLARE_VISIT_INSTRUCTION
276 
277   void VisitInstruction(HInstruction* instruction) override {
278     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
279                << " (id " << instruction->GetId() << ")";
280   }
281 
GetAssembler()282   X86_64Assembler* GetAssembler() const { return assembler_; }
283 
284   // Generate a GC root reference load:
285   //
286   //   root <- *address
287   //
288   // while honoring read barriers based on read_barrier_option.
289   void GenerateGcRootFieldLoad(HInstruction* instruction,
290                                Location root,
291                                const Address& address,
292                                Label* fixup_label,
293                                ReadBarrierOption read_barrier_option);
294   void HandleFieldSet(HInstruction* instruction,
295                       uint32_t value_index,
296                       uint32_t extra_temp_index,
297                       DataType::Type field_type,
298                       Address field_addr,
299                       CpuRegister base,
300                       bool is_volatile,
301                       bool is_atomic,
302                       bool value_can_be_null,
303                       bool byte_swap,
304                       WriteBarrierKind write_barrier_kind);
305 
306   void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr);
307 
308  private:
309   // Generate code for the given suspend check. If not null, `successor`
310   // is the block to branch to if the suspend check is not needed, and after
311   // the suspend call.
312   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
313   void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
314   void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);
315   void HandleBitwiseOperation(HBinaryOperation* operation);
316   void GenerateRemFP(HRem* rem);
317   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
318   void DivByPowerOfTwo(HDiv* instruction);
319   void RemByPowerOfTwo(HRem* instruction);
320   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
321   void GenerateDivRemIntegral(HBinaryOperation* instruction);
322   void HandleCondition(HCondition* condition);
323   void HandleShift(HBinaryOperation* operation);
324   void HandleRotate(HBinaryOperation* rotate);
325 
326   void HandleFieldSet(HInstruction* instruction,
327                       const FieldInfo& field_info,
328                       bool value_can_be_null,
329                       WriteBarrierKind write_barrier_kind);
330   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
331 
332   void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
333   void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
334   void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
335   void GenerateMethodEntryExitHook(HInstruction* instruction);
336 
337   // Generate a heap reference load using one register `out`:
338   //
339   //   out <- *(out + offset)
340   //
341   // while honoring heap poisoning and/or read barriers (if any).
342   //
343   // Location `maybe_temp` is used when generating a read barrier and
344   // shall be a register in that case; it may be an invalid location
345   // otherwise.
346   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
347                                         Location out,
348                                         uint32_t offset,
349                                         Location maybe_temp,
350                                         ReadBarrierOption read_barrier_option);
351   // Generate a heap reference load using two different registers
352   // `out` and `obj`:
353   //
354   //   out <- *(obj + offset)
355   //
356   // while honoring heap poisoning and/or read barriers (if any).
357   //
358   // Location `maybe_temp` is used when generating a Baker's (fast
359   // path) read barrier and shall be a register in that case; it may
360   // be an invalid location otherwise.
361   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
362                                          Location out,
363                                          Location obj,
364                                          uint32_t offset,
365                                          ReadBarrierOption read_barrier_option);
366 
367   void PushOntoFPStack(Location source, uint32_t temp_offset,
368                        uint32_t stack_adjustment, bool is_float);
369   void GenerateCompareTest(HCondition* condition);
370   template<class LabelType>
371   void GenerateTestAndBranch(HInstruction* instruction,
372                              size_t condition_input_index,
373                              LabelType* true_target,
374                              LabelType* false_target);
375   template<class LabelType>
376   void GenerateCompareTestAndBranch(HCondition* condition,
377                                     LabelType* true_target,
378                                     LabelType* false_target);
379   template<class LabelType>
380   void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
381 
382   void HandleGoto(HInstruction* got, HBasicBlock* successor);
383 
384   bool CpuHasAvxFeatureFlag();
385   bool CpuHasAvx2FeatureFlag();
386 
387   X86_64Assembler* const assembler_;
388   CodeGeneratorX86_64* const codegen_;
389 
390   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64);
391 };
392 
393 // Class for fixups to jump tables.
394 class JumpTableRIPFixup;
395 
396 class CodeGeneratorX86_64 : public CodeGenerator {
397  public:
398   CodeGeneratorX86_64(HGraph* graph,
399                   const CompilerOptions& compiler_options,
400                   OptimizingCompilerStats* stats = nullptr);
~CodeGeneratorX86_64()401   virtual ~CodeGeneratorX86_64() {}
402 
403   void GenerateFrameEntry() override;
404   void GenerateFrameExit() override;
405   void Bind(HBasicBlock* block) override;
406   void MoveConstant(Location destination, int32_t value) override;
407   void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
408   void AddLocationAsTemp(Location location, LocationSummary* locations) override;
409 
410   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
411   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
412   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
413   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
414 
415   // Generate code to invoke a runtime entry point.
416   void InvokeRuntime(QuickEntrypointEnum entrypoint,
417                      HInstruction* instruction,
418                      SlowPathCode* slow_path = nullptr) override;
419 
420   // Generate code to invoke a runtime entry point, but do not record
421   // PC-related information in a stack map.
422   void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
423                                            HInstruction* instruction,
424                                            SlowPathCode* slow_path);
425 
426   void GenerateInvokeRuntime(int32_t entry_point_offset);
427 
GetWordSize()428   size_t GetWordSize() const override {
429     return kX86_64WordSize;
430   }
431 
GetSlowPathFPWidth()432   size_t GetSlowPathFPWidth() const override {
433     return GetGraph()->HasSIMD()
434         ? GetSIMDRegisterWidth()
435         : 1 * kX86_64WordSize;  //  8 bytes == 1 x86_64 words for each spill
436   }
437 
GetCalleePreservedFPWidth()438   size_t GetCalleePreservedFPWidth() const override {
439     return 1 * kX86_64WordSize;
440   }
441 
GetSIMDRegisterWidth()442   size_t GetSIMDRegisterWidth() const override {
443     return 2 * kX86_64WordSize;
444   }
445 
GetLocationBuilder()446   HGraphVisitor* GetLocationBuilder() override {
447     return &location_builder_;
448   }
449 
GetInstructionVisitor()450   HGraphVisitor* GetInstructionVisitor() override {
451     return &instruction_visitor_;
452   }
453 
GetAssembler()454   X86_64Assembler* GetAssembler() override {
455     return &assembler_;
456   }
457 
GetAssembler()458   const X86_64Assembler& GetAssembler() const override {
459     return assembler_;
460   }
461 
GetMoveResolver()462   ParallelMoveResolverX86_64* GetMoveResolver() override {
463     return &move_resolver_;
464   }
465 
GetAddressOf(HBasicBlock * block)466   uintptr_t GetAddressOf(HBasicBlock* block) override {
467     return GetLabelOf(block)->Position();
468   }
469 
470   void SetupBlockedRegisters() const override;
471   void DumpCoreRegister(std::ostream& stream, int reg) const override;
472   void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
473   void Finalize() override;
474 
GetInstructionSet()475   InstructionSet GetInstructionSet() const override {
476     return InstructionSet::kX86_64;
477   }
478 
GetInstructionCodegen()479   InstructionCodeGeneratorX86_64* GetInstructionCodegen() {
480     return down_cast<InstructionCodeGeneratorX86_64*>(GetInstructionVisitor());
481   }
482 
483   const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const;
484 
485   // Emit a write barrier if:
486   // A) emit_null_check is false
487   // B) emit_null_check is true, and value is not null.
488   void MaybeMarkGCCard(CpuRegister temp,
489                        CpuRegister card,
490                        CpuRegister object,
491                        CpuRegister value,
492                        bool emit_null_check);
493 
494   // Emit a write barrier unconditionally.
495   void MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object);
496 
497   // Crash if the card table is not valid. This check is only emitted for the CC GC. We assert
498   // `(!clean || !self->is_gc_marking)`, since the card table should not be set to clean when the CC
499   // GC is marking for eliminated write barriers.
500   void CheckGCCardIsValid(CpuRegister temp, CpuRegister card, CpuRegister object);
501 
502   void GenerateMemoryBarrier(MemBarrierKind kind);
503 
504   // Helper method to move a value between two locations.
505   void Move(Location destination, Location source);
506   // Helper method to load a value of non-reference type from memory.
507   void LoadFromMemoryNoReference(DataType::Type type, Location dst, Address src);
508 
GetLabelOf(HBasicBlock * block)509   Label* GetLabelOf(HBasicBlock* block) const {
510     return CommonGetLabelOf<Label>(block_labels_, block);
511   }
512 
Initialize()513   void Initialize() override {
514     block_labels_ = CommonInitializeLabels<Label>();
515   }
516 
NeedsTwoRegisters(DataType::Type type)517   bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
518 
519   // Check if the desired_string_load_kind is supported. If it is, return it,
520   // otherwise return a fall-back kind that should be used instead.
521   HLoadString::LoadKind GetSupportedLoadStringKind(
522       HLoadString::LoadKind desired_string_load_kind) override;
523 
524   // Check if the desired_class_load_kind is supported. If it is, return it,
525   // otherwise return a fall-back kind that should be used instead.
526   HLoadClass::LoadKind GetSupportedLoadClassKind(
527       HLoadClass::LoadKind desired_class_load_kind) override;
528 
529   // Check if the desired_dispatch_info is supported. If it is, return it,
530   // otherwise return a fall-back info that should be used instead.
531   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
532       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
533       ArtMethod* method) override;
534 
535   void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke);
536   void GenerateStaticOrDirectCall(
537       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
538   void GenerateVirtualCall(
539       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
540 
541   void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data);
542   void RecordBootImageRelRoPatch(uint32_t boot_image_offset);
543   void RecordBootImageMethodPatch(HInvoke* invoke);
544   void RecordAppImageMethodPatch(HInvoke* invoke);
545   void RecordMethodBssEntryPatch(HInvoke* invoke);
546   void RecordBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
547   void RecordAppImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
548   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
549   void RecordBootImageStringPatch(HLoadString* load_string);
550   Label* NewStringBssEntryPatch(HLoadString* load_string);
551   Label* NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type);
552   void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke);
553   Label* NewJitRootStringPatch(const DexFile& dex_file,
554                                dex::StringIndex string_index,
555                                Handle<mirror::String> handle);
556   Label* NewJitRootClassPatch(const DexFile& dex_file,
557                               dex::TypeIndex type_index,
558                               Handle<mirror::Class> handle);
559   Label* NewJitRootMethodTypePatch(const DexFile& dex_file,
560                                    dex::ProtoIndex proto_index,
561                                    Handle<mirror::MethodType> method_type);
562 
563   void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference);
564   void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke);
565   void LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root);
566 
567   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
568 
569   void PatchJitRootUse(uint8_t* code,
570                        const uint8_t* roots_data,
571                        const PatchInfo<Label>& info,
572                        uint64_t index_in_table) const;
573 
574   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
575 
576   // Fast path implementation of ReadBarrier::Barrier for a heap
577   // reference field load when Baker's read barriers are used.
578   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
579                                              Location ref,
580                                              CpuRegister obj,
581                                              uint32_t offset,
582                                              bool needs_null_check);
583   // Fast path implementation of ReadBarrier::Barrier for a heap
584   // reference array load when Baker's read barriers are used.
585   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
586                                              Location ref,
587                                              CpuRegister obj,
588                                              uint32_t data_offset,
589                                              Location index,
590                                              bool needs_null_check);
591   // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
592   // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
593   //
594   // Load the object reference located at address `src`, held by
595   // object `obj`, into `ref`, and mark it if needed.  The base of
596   // address `src` must be `obj`.
597   //
598   // If `always_update_field` is true, the value of the reference is
599   // atomically updated in the holder (`obj`).  This operation
600   // requires two temporary registers, which must be provided as
601   // non-null pointers (`temp1` and `temp2`).
602   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
603                                                  Location ref,
604                                                  CpuRegister obj,
605                                                  const Address& src,
606                                                  bool needs_null_check,
607                                                  bool always_update_field = false,
608                                                  CpuRegister* temp1 = nullptr,
609                                                  CpuRegister* temp2 = nullptr);
610 
611   // Generate a read barrier for a heap reference within `instruction`
612   // using a slow path.
613   //
614   // A read barrier for an object reference read from the heap is
615   // implemented as a call to the artReadBarrierSlow runtime entry
616   // point, which is passed the values in locations `ref`, `obj`, and
617   // `offset`:
618   //
619   //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
620   //                                      mirror::Object* obj,
621   //                                      uint32_t offset);
622   //
623   // The `out` location contains the value returned by
624   // artReadBarrierSlow.
625   //
626   // When `index` provided (i.e., when it is different from
627   // Location::NoLocation()), the offset value passed to
628   // artReadBarrierSlow is adjusted to take `index` into account.
629   void GenerateReadBarrierSlow(HInstruction* instruction,
630                                Location out,
631                                Location ref,
632                                Location obj,
633                                uint32_t offset,
634                                Location index = Location::NoLocation());
635 
636   // If read barriers are enabled, generate a read barrier for a heap
637   // reference using a slow path. If heap poisoning is enabled, also
638   // unpoison the reference in `out`.
639   void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
640                                     Location out,
641                                     Location ref,
642                                     Location obj,
643                                     uint32_t offset,
644                                     Location index = Location::NoLocation());
645 
646   // Generate a read barrier for a GC root within `instruction` using
647   // a slow path.
648   //
649   // A read barrier for an object reference GC root is implemented as
650   // a call to the artReadBarrierForRootSlow runtime entry point,
651   // which is passed the value in location `root`:
652   //
653   //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
654   //
655   // The `out` location contains the value returned by
656   // artReadBarrierForRootSlow.
657   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
658 
ConstantAreaStart()659   int ConstantAreaStart() const {
660     return constant_area_start_;
661   }
662 
663   Address LiteralDoubleAddress(double v);
664   Address LiteralFloatAddress(float v);
665   Address LiteralInt32Address(int32_t v);
666   Address LiteralInt64Address(int64_t v);
667 
668   // Load a 32/64-bit value into a register in the most efficient manner.
669   void Load32BitValue(CpuRegister dest, int32_t value);
670   void Load64BitValue(CpuRegister dest, int64_t value);
671   void Load32BitValue(XmmRegister dest, int32_t value);
672   void Load64BitValue(XmmRegister dest, int64_t value);
673   void Load32BitValue(XmmRegister dest, float value);
674   void Load64BitValue(XmmRegister dest, double value);
675 
676   // Compare a register with a 32/64-bit value in the most efficient manner.
677   void Compare32BitValue(CpuRegister dest, int32_t value);
678   void Compare64BitValue(CpuRegister dest, int64_t value);
679 
680   // Compare int values. Supports register locations for `lhs`.
681   void GenerateIntCompare(Location lhs, Location rhs);
682   void GenerateIntCompare(CpuRegister lhs, Location rhs);
683 
684   // Compare long values. Supports only register locations for `lhs`.
685   void GenerateLongCompare(Location lhs, Location rhs);
686 
687   // Construct address for array access.
688   static Address ArrayAddress(CpuRegister obj,
689                               Location index,
690                               ScaleFactor scale,
691                               uint32_t data_offset);
692 
693   Address LiteralCaseTable(HPackedSwitch* switch_instr);
694 
695   // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
696   void Store64BitValueToStack(Location dest, int64_t value);
697 
698   void MoveFromReturnRegister(Location trg, DataType::Type type) override;
699 
700   // Assign a 64 bit constant to an address.
701   void MoveInt64ToAddress(const Address& addr_low,
702                           const Address& addr_high,
703                           int64_t v,
704                           HInstruction* instruction);
705 
706   // Ensure that prior stores complete to memory before subsequent loads.
707   // The locked add implementation will avoid serializing device memory, but will
708   // touch (but not change) the top of the stack.
709   // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
710   void MemoryFence(bool force_mfence = false) {
711     if (!force_mfence) {
712       assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
713     } else {
714       assembler_.mfence();
715     }
716   }
717 
718   void IncreaseFrame(size_t adjustment) override;
719   void DecreaseFrame(size_t adjustment) override;
720 
721   void GenerateNop() override;
722   void GenerateImplicitNullCheck(HNullCheck* instruction) override;
723   void GenerateExplicitNullCheck(HNullCheck* instruction) override;
724   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls);
725 
726   void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
727 
728   static void BlockNonVolatileXmmRegisters(LocationSummary* locations);
729 
730   // When we don't know the proper offset for the value, we use kPlaceholder32BitOffset.
731   // We will fix this up in the linker later to have the right value.
732   static constexpr int32_t kPlaceholder32BitOffset = 256;
733 
734  private:
735   template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
736   static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos,
737                                           ArenaVector<linker::LinkerPatch>* linker_patches);
738 
739   // Labels for each block that will be compiled.
740   Label* block_labels_;  // Indexed by block id.
741   Label frame_entry_label_;
742   LocationsBuilderX86_64 location_builder_;
743   InstructionCodeGeneratorX86_64 instruction_visitor_;
744   ParallelMoveResolverX86_64 move_resolver_;
745   X86_64Assembler assembler_;
746 
747   // Offset to the start of the constant area in the assembled code.
748   // Used for fixups to the constant area.
749   int constant_area_start_;
750 
751   // PC-relative method patch info for kBootImageLinkTimePcRelative.
752   ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
753   // PC-relative method patch info for kAppImageRelRo.
754   ArenaDeque<PatchInfo<Label>> app_image_method_patches_;
755   // PC-relative method patch info for kBssEntry.
756   ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_;
757   // PC-relative type patch info for kBootImageLinkTimePcRelative.
758   ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
759   // PC-relative type patch info for kAppImageRelRo.
760   ArenaDeque<PatchInfo<Label>> app_image_type_patches_;
761   // PC-relative type patch info for kBssEntry.
762   ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
763   // PC-relative public type patch info for kBssEntryPublic.
764   ArenaDeque<PatchInfo<Label>> public_type_bss_entry_patches_;
765   // PC-relative package type patch info for kBssEntryPackage.
766   ArenaDeque<PatchInfo<Label>> package_type_bss_entry_patches_;
767   // PC-relative String patch info for kBootImageLinkTimePcRelative.
768   ArenaDeque<PatchInfo<Label>> boot_image_string_patches_;
769   // PC-relative String patch info for kBssEntry.
770   ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_;
771   // PC-relative MethodType patch info for kBssEntry.
772   ArenaDeque<PatchInfo<Label>> method_type_bss_entry_patches_;
773   // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative.
774   ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_;
775   // PC-relative patch info for IntrinsicObjects for the boot image,
776   // and for method/type/string patches for kBootImageRelRo otherwise.
777   ArenaDeque<PatchInfo<Label>> boot_image_other_patches_;
778 
779   // Patches for string literals in JIT compiled code.
780   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
781   // Patches for class literals in JIT compiled code.
782   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
783   // Patches for method type in JIT compiled code.
784   ArenaDeque<PatchInfo<Label>> jit_method_type_patches_;
785 
786   // Fixups for jump tables need to be handled specially.
787   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
788 
789   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
790 };
791 
792 }  // namespace x86_64
793 }  // namespace art
794 
795 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
796