• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
19 
20 #include "arch/x86/instruction_set_features_x86.h"
21 #include "base/enums.h"
22 #include "code_generator.h"
23 #include "dex_file_types.h"
24 #include "driver/compiler_options.h"
25 #include "nodes.h"
26 #include "parallel_move_resolver.h"
27 #include "utils/x86/assembler_x86.h"
28 
29 namespace art {
30 namespace x86 {
31 
32 // Use a local definition to prevent copying mistakes.
33 static constexpr size_t kX86WordSize = static_cast<size_t>(kX86PointerSize);
34 
35 class CodeGeneratorX86;
36 
37 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
38 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX };
39 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
40 static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
41 static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
42 
43 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
44 static constexpr size_t kRuntimeParameterCoreRegistersLength =
45     arraysize(kRuntimeParameterCoreRegisters);
46 static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
47 static constexpr size_t kRuntimeParameterFpuRegistersLength =
48     arraysize(kRuntimeParameterFpuRegisters);
49 
50 class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
51  public:
InvokeRuntimeCallingConvention()52   InvokeRuntimeCallingConvention()
53       : CallingConvention(kRuntimeParameterCoreRegisters,
54                           kRuntimeParameterCoreRegistersLength,
55                           kRuntimeParameterFpuRegisters,
56                           kRuntimeParameterFpuRegistersLength,
57                           kX86PointerSize) {}
58 
59  private:
60   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
61 };
62 
63 class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> {
64  public:
InvokeDexCallingConvention()65   InvokeDexCallingConvention() : CallingConvention(
66       kParameterCoreRegisters,
67       kParameterCoreRegistersLength,
68       kParameterFpuRegisters,
69       kParameterFpuRegistersLength,
70       kX86PointerSize) {}
71 
GetRegisterPairAt(size_t argument_index)72   RegisterPair GetRegisterPairAt(size_t argument_index) {
73     DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
74     return kParameterCorePairRegisters[argument_index];
75   }
76 
77  private:
78   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
79 };
80 
81 class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
82  public:
InvokeDexCallingConventionVisitorX86()83   InvokeDexCallingConventionVisitorX86() {}
~InvokeDexCallingConventionVisitorX86()84   virtual ~InvokeDexCallingConventionVisitorX86() {}
85 
86   Location GetNextLocation(Primitive::Type type) OVERRIDE;
87   Location GetReturnLocation(Primitive::Type type) const OVERRIDE;
88   Location GetMethodLocation() const OVERRIDE;
89 
90  private:
91   InvokeDexCallingConvention calling_convention;
92 
93   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
94 };
95 
96 class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
97  public:
FieldAccessCallingConventionX86()98   FieldAccessCallingConventionX86() {}
99 
GetObjectLocation()100   Location GetObjectLocation() const OVERRIDE {
101     return Location::RegisterLocation(ECX);
102   }
GetFieldIndexLocation()103   Location GetFieldIndexLocation() const OVERRIDE {
104     return Location::RegisterLocation(EAX);
105   }
GetReturnLocation(Primitive::Type type)106   Location GetReturnLocation(Primitive::Type type) const OVERRIDE {
107     return Primitive::Is64BitType(type)
108         ? Location::RegisterPairLocation(EAX, EDX)
109         : Location::RegisterLocation(EAX);
110   }
GetSetValueLocation(Primitive::Type type,bool is_instance)111   Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
112     return Primitive::Is64BitType(type)
113         ? (is_instance
114             ? Location::RegisterPairLocation(EDX, EBX)
115             : Location::RegisterPairLocation(ECX, EDX))
116         : (is_instance
117             ? Location::RegisterLocation(EDX)
118             : Location::RegisterLocation(ECX));
119   }
GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED)120   Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
121     return Location::FpuRegisterLocation(XMM0);
122   }
123 
124  private:
125   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86);
126 };
127 
128 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
129  public:
ParallelMoveResolverX86(ArenaAllocator * allocator,CodeGeneratorX86 * codegen)130   ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
131       : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
132 
133   void EmitMove(size_t index) OVERRIDE;
134   void EmitSwap(size_t index) OVERRIDE;
135   void SpillScratch(int reg) OVERRIDE;
136   void RestoreScratch(int reg) OVERRIDE;
137 
138   X86Assembler* GetAssembler() const;
139 
140  private:
141   void Exchange(Register reg, int mem);
142   void Exchange(int mem1, int mem2);
143   void Exchange32(XmmRegister reg, int mem);
144   void MoveMemoryToMemory32(int dst, int src);
145   void MoveMemoryToMemory64(int dst, int src);
146 
147   CodeGeneratorX86* const codegen_;
148 
149   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86);
150 };
151 
152 class LocationsBuilderX86 : public HGraphVisitor {
153  public:
LocationsBuilderX86(HGraph * graph,CodeGeneratorX86 * codegen)154   LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen)
155       : HGraphVisitor(graph), codegen_(codegen) {}
156 
157 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
158   void Visit##name(H##name* instr) OVERRIDE;
159 
160   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)161   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
162 
163 #undef DECLARE_VISIT_INSTRUCTION
164 
165   void VisitInstruction(HInstruction* instruction) OVERRIDE {
166     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
167                << " (id " << instruction->GetId() << ")";
168   }
169 
170  private:
171   void HandleBitwiseOperation(HBinaryOperation* instruction);
172   void HandleInvoke(HInvoke* invoke);
173   void HandleCondition(HCondition* condition);
174   void HandleShift(HBinaryOperation* instruction);
175   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
176   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
177 
178   CodeGeneratorX86* const codegen_;
179   InvokeDexCallingConventionVisitorX86 parameter_visitor_;
180 
181   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
182 };
183 
184 class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
185  public:
186   InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
187 
188 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
189   void Visit##name(H##name* instr) OVERRIDE;
190 
191   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)192   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
193 
194 #undef DECLARE_VISIT_INSTRUCTION
195 
196   void VisitInstruction(HInstruction* instruction) OVERRIDE {
197     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
198                << " (id " << instruction->GetId() << ")";
199   }
200 
GetAssembler()201   X86Assembler* GetAssembler() const { return assembler_; }
202 
203   // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
204   // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
205   // generates less code/data with a small num_entries.
206   static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
207 
208  private:
209   // Generate code for the given suspend check. If not null, `successor`
210   // is the block to branch to if the suspend check is not needed, and after
211   // the suspend call.
212   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
213   void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
214   void HandleBitwiseOperation(HBinaryOperation* instruction);
215   void GenerateDivRemIntegral(HBinaryOperation* instruction);
216   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
217   void DivByPowerOfTwo(HDiv* instruction);
218   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
219   void GenerateRemFP(HRem* rem);
220   void HandleCondition(HCondition* condition);
221   void HandleShift(HBinaryOperation* instruction);
222   void GenerateShlLong(const Location& loc, Register shifter);
223   void GenerateShrLong(const Location& loc, Register shifter);
224   void GenerateUShrLong(const Location& loc, Register shifter);
225   void GenerateShlLong(const Location& loc, int shift);
226   void GenerateShrLong(const Location& loc, int shift);
227   void GenerateUShrLong(const Location& loc, int shift);
228 
229   void HandleFieldSet(HInstruction* instruction,
230                       const FieldInfo& field_info,
231                       bool value_can_be_null);
232   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
233 
234   // Generate a heap reference load using one register `out`:
235   //
236   //   out <- *(out + offset)
237   //
238   // while honoring heap poisoning and/or read barriers (if any).
239   //
240   // Location `maybe_temp` is used when generating a read barrier and
241   // shall be a register in that case; it may be an invalid location
242   // otherwise.
243   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
244                                         Location out,
245                                         uint32_t offset,
246                                         Location maybe_temp,
247                                         ReadBarrierOption read_barrier_option);
248   // Generate a heap reference load using two different registers
249   // `out` and `obj`:
250   //
251   //   out <- *(obj + offset)
252   //
253   // while honoring heap poisoning and/or read barriers (if any).
254   //
255   // Location `maybe_temp` is used when generating a Baker's (fast
256   // path) read barrier and shall be a register in that case; it may
257   // be an invalid location otherwise.
258   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
259                                          Location out,
260                                          Location obj,
261                                          uint32_t offset,
262                                          ReadBarrierOption read_barrier_option);
263   // Generate a GC root reference load:
264   //
265   //   root <- *address
266   //
267   // while honoring read barriers based on read_barrier_option.
268   void GenerateGcRootFieldLoad(HInstruction* instruction,
269                                Location root,
270                                const Address& address,
271                                Label* fixup_label,
272                                ReadBarrierOption read_barrier_option);
273 
274   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
275   // `is_wide` specifies whether it is long/double or not.
276   void PushOntoFPStack(Location source, uint32_t temp_offset,
277                        uint32_t stack_adjustment, bool is_fp, bool is_wide);
278 
279   template<class LabelType>
280   void GenerateTestAndBranch(HInstruction* instruction,
281                              size_t condition_input_index,
282                              LabelType* true_target,
283                              LabelType* false_target);
284   template<class LabelType>
285   void GenerateCompareTestAndBranch(HCondition* condition,
286                                     LabelType* true_target,
287                                     LabelType* false_target);
288   template<class LabelType>
289   void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
290   template<class LabelType>
291   void GenerateLongComparesAndJumps(HCondition* cond,
292                                     LabelType* true_label,
293                                     LabelType* false_label);
294 
295   void HandleGoto(HInstruction* got, HBasicBlock* successor);
296   void GenPackedSwitchWithCompares(Register value_reg,
297                                    int32_t lower_bound,
298                                    uint32_t num_entries,
299                                    HBasicBlock* switch_block,
300                                    HBasicBlock* default_block);
301 
302   void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
303 
304   X86Assembler* const assembler_;
305   CodeGeneratorX86* const codegen_;
306 
307   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86);
308 };
309 
310 class JumpTableRIPFixup;
311 
312 class CodeGeneratorX86 : public CodeGenerator {
313  public:
314   CodeGeneratorX86(HGraph* graph,
315                    const X86InstructionSetFeatures& isa_features,
316                    const CompilerOptions& compiler_options,
317                    OptimizingCompilerStats* stats = nullptr);
~CodeGeneratorX86()318   virtual ~CodeGeneratorX86() {}
319 
320   void GenerateFrameEntry() OVERRIDE;
321   void GenerateFrameExit() OVERRIDE;
322   void Bind(HBasicBlock* block) OVERRIDE;
323   void MoveConstant(Location destination, int32_t value) OVERRIDE;
324   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
325   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
326 
327   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
328   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
329   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
330   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
331 
332   // Generate code to invoke a runtime entry point.
333   void InvokeRuntime(QuickEntrypointEnum entrypoint,
334                      HInstruction* instruction,
335                      uint32_t dex_pc,
336                      SlowPathCode* slow_path = nullptr) OVERRIDE;
337 
338   // Generate code to invoke a runtime entry point, but do not record
339   // PC-related information in a stack map.
340   void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
341                                            HInstruction* instruction,
342                                            SlowPathCode* slow_path);
343 
344   void GenerateInvokeRuntime(int32_t entry_point_offset);
345 
GetWordSize()346   size_t GetWordSize() const OVERRIDE {
347     return kX86WordSize;
348   }
349 
GetFloatingPointSpillSlotSize()350   size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
351     return GetGraph()->HasSIMD()
352         ? 4 * kX86WordSize   // 16 bytes == 4 words for each spill
353         : 2 * kX86WordSize;  //  8 bytes == 2 words for each spill
354   }
355 
GetLocationBuilder()356   HGraphVisitor* GetLocationBuilder() OVERRIDE {
357     return &location_builder_;
358   }
359 
GetInstructionVisitor()360   HGraphVisitor* GetInstructionVisitor() OVERRIDE {
361     return &instruction_visitor_;
362   }
363 
GetAssembler()364   X86Assembler* GetAssembler() OVERRIDE {
365     return &assembler_;
366   }
367 
GetAssembler()368   const X86Assembler& GetAssembler() const OVERRIDE {
369     return assembler_;
370   }
371 
GetAddressOf(HBasicBlock * block)372   uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
373     return GetLabelOf(block)->Position();
374   }
375 
376   void SetupBlockedRegisters() const OVERRIDE;
377 
378   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
379   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
380 
GetMoveResolver()381   ParallelMoveResolverX86* GetMoveResolver() OVERRIDE {
382     return &move_resolver_;
383   }
384 
GetInstructionSet()385   InstructionSet GetInstructionSet() const OVERRIDE {
386     return InstructionSet::kX86;
387   }
388 
389   // Helper method to move a 32bits value between two locations.
390   void Move32(Location destination, Location source);
391   // Helper method to move a 64bits value between two locations.
392   void Move64(Location destination, Location source);
393 
394   // Check if the desired_string_load_kind is supported. If it is, return it,
395   // otherwise return a fall-back kind that should be used instead.
396   HLoadString::LoadKind GetSupportedLoadStringKind(
397       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
398 
399   // Check if the desired_class_load_kind is supported. If it is, return it,
400   // otherwise return a fall-back kind that should be used instead.
401   HLoadClass::LoadKind GetSupportedLoadClassKind(
402       HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
403 
404   // Check if the desired_dispatch_info is supported. If it is, return it,
405   // otherwise return a fall-back info that should be used instead.
406   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
407       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
408       HInvokeStaticOrDirect* invoke) OVERRIDE;
409 
410   // Generate a call to a static or direct method.
411   void GenerateStaticOrDirectCall(
412       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
413   // Generate a call to a virtual method.
414   void GenerateVirtualCall(
415       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
416 
417   void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke);
418   Label* NewMethodBssEntryPatch(HX86ComputeBaseMethodAddress* method_address,
419                                 MethodReference target_method);
420   void RecordBootTypePatch(HLoadClass* load_class);
421   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
422   void RecordBootStringPatch(HLoadString* load_string);
423   Label* NewStringBssEntryPatch(HLoadString* load_string);
424   Label* NewJitRootStringPatch(const DexFile& dex_file,
425                                dex::StringIndex dex_index,
426                                Handle<mirror::String> handle);
427   Label* NewJitRootClassPatch(const DexFile& dex_file,
428                               dex::TypeIndex dex_index,
429                               Handle<mirror::Class> handle);
430 
431   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
432 
433   // Emit linker patches.
434   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
435 
436   void PatchJitRootUse(uint8_t* code,
437                        const uint8_t* roots_data,
438                        const PatchInfo<Label>& info,
439                        uint64_t index_in_table) const;
440   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
441 
442   // Emit a write barrier.
443   void MarkGCCard(Register temp,
444                   Register card,
445                   Register object,
446                   Register value,
447                   bool value_can_be_null);
448 
449   void GenerateMemoryBarrier(MemBarrierKind kind);
450 
GetLabelOf(HBasicBlock * block)451   Label* GetLabelOf(HBasicBlock* block) const {
452     return CommonGetLabelOf<Label>(block_labels_, block);
453   }
454 
Initialize()455   void Initialize() OVERRIDE {
456     block_labels_ = CommonInitializeLabels<Label>();
457   }
458 
NeedsTwoRegisters(Primitive::Type type)459   bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
460     return type == Primitive::kPrimLong;
461   }
462 
ShouldSplitLongMoves()463   bool ShouldSplitLongMoves() const OVERRIDE { return true; }
464 
GetFrameEntryLabel()465   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
466 
GetInstructionSetFeatures()467   const X86InstructionSetFeatures& GetInstructionSetFeatures() const {
468     return isa_features_;
469   }
470 
AddMethodAddressOffset(HX86ComputeBaseMethodAddress * method_base,int32_t offset)471   void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) {
472     method_address_offset_.Put(method_base->GetId(), offset);
473   }
474 
GetMethodAddressOffset(HX86ComputeBaseMethodAddress * method_base)475   int32_t GetMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base) const {
476     return method_address_offset_.Get(method_base->GetId());
477   }
478 
ConstantAreaStart()479   int32_t ConstantAreaStart() const {
480     return constant_area_start_;
481   }
482 
483   Address LiteralDoubleAddress(double v, HX86ComputeBaseMethodAddress* method_base, Register reg);
484   Address LiteralFloatAddress(float v, HX86ComputeBaseMethodAddress* method_base, Register reg);
485   Address LiteralInt32Address(int32_t v, HX86ComputeBaseMethodAddress* method_base, Register reg);
486   Address LiteralInt64Address(int64_t v, HX86ComputeBaseMethodAddress* method_base, Register reg);
487 
488   // Load a 32-bit value into a register in the most efficient manner.
489   void Load32BitValue(Register dest, int32_t value);
490 
491   // Compare a register with a 32-bit value in the most efficient manner.
492   void Compare32BitValue(Register dest, int32_t value);
493 
494   // Compare int values. Supports only register locations for `lhs`.
495   void GenerateIntCompare(Location lhs, Location rhs);
496   void GenerateIntCompare(Register lhs, Location rhs);
497 
498   // Construct address for array access.
499   static Address ArrayAddress(Register obj,
500                               Location index,
501                               ScaleFactor scale,
502                               uint32_t data_offset);
503 
504   Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
505 
506   void Finalize(CodeAllocator* allocator) OVERRIDE;
507 
508   // Fast path implementation of ReadBarrier::Barrier for a heap
509   // reference field load when Baker's read barriers are used.
510   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
511                                              Location ref,
512                                              Register obj,
513                                              uint32_t offset,
514                                              bool needs_null_check);
515   // Fast path implementation of ReadBarrier::Barrier for a heap
516   // reference array load when Baker's read barriers are used.
517   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
518                                              Location ref,
519                                              Register obj,
520                                              uint32_t data_offset,
521                                              Location index,
522                                              bool needs_null_check);
523   // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
524   // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
525   //
526   // Load the object reference located at address `src`, held by
527   // object `obj`, into `ref`, and mark it if needed.  The base of
528   // address `src` must be `obj`.
529   //
530   // If `always_update_field` is true, the value of the reference is
531   // atomically updated in the holder (`obj`).  This operation
532   // requires a temporary register, which must be provided as a
533   // non-null pointer (`temp`).
534   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
535                                                  Location ref,
536                                                  Register obj,
537                                                  const Address& src,
538                                                  bool needs_null_check,
539                                                  bool always_update_field = false,
540                                                  Register* temp = nullptr);
541 
542   // Generate a read barrier for a heap reference within `instruction`
543   // using a slow path.
544   //
545   // A read barrier for an object reference read from the heap is
546   // implemented as a call to the artReadBarrierSlow runtime entry
547   // point, which is passed the values in locations `ref`, `obj`, and
548   // `offset`:
549   //
550   //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
551   //                                      mirror::Object* obj,
552   //                                      uint32_t offset);
553   //
554   // The `out` location contains the value returned by
555   // artReadBarrierSlow.
556   //
557   // When `index` is provided (i.e. for array accesses), the offset
558   // value passed to artReadBarrierSlow is adjusted to take `index`
559   // into account.
560   void GenerateReadBarrierSlow(HInstruction* instruction,
561                                Location out,
562                                Location ref,
563                                Location obj,
564                                uint32_t offset,
565                                Location index = Location::NoLocation());
566 
567   // If read barriers are enabled, generate a read barrier for a heap
568   // reference using a slow path. If heap poisoning is enabled, also
569   // unpoison the reference in `out`.
570   void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
571                                     Location out,
572                                     Location ref,
573                                     Location obj,
574                                     uint32_t offset,
575                                     Location index = Location::NoLocation());
576 
577   // Generate a read barrier for a GC root within `instruction` using
578   // a slow path.
579   //
580   // A read barrier for an object reference GC root is implemented as
581   // a call to the artReadBarrierForRootSlow runtime entry point,
582   // which is passed the value in location `root`:
583   //
584   //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
585   //
586   // The `out` location contains the value returned by
587   // artReadBarrierForRootSlow.
588   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
589 
590   // Ensure that prior stores complete to memory before subsequent loads.
591   // The locked add implementation will avoid serializing device memory, but will
592   // touch (but not change) the top of the stack.
593   // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
594   void MemoryFence(bool non_temporal = false) {
595     if (!non_temporal) {
596       assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
597     } else {
598       assembler_.mfence();
599     }
600   }
601 
602   void GenerateNop() OVERRIDE;
603   void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
604   void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
605 
606   // When we don't know the proper offset for the value, we use kDummy32BitOffset.
607   // The correct value will be inserted when processing Assembler fixups.
608   static constexpr int32_t kDummy32BitOffset = 256;
609 
610  private:
611   struct X86PcRelativePatchInfo : PatchInfo<Label> {
X86PcRelativePatchInfoX86PcRelativePatchInfo612     X86PcRelativePatchInfo(HX86ComputeBaseMethodAddress* address,
613                            const DexFile& target_dex_file,
614                            uint32_t target_index)
615         : PatchInfo(target_dex_file, target_index),
616           method_address(address) {}
617     HX86ComputeBaseMethodAddress* method_address;
618   };
619 
620   template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
621   void EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo>& infos,
622                                    ArenaVector<LinkerPatch>* linker_patches);
623 
624   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
625 
626   // Labels for each block that will be compiled.
627   Label* block_labels_;  // Indexed by block id.
628   Label frame_entry_label_;
629   LocationsBuilderX86 location_builder_;
630   InstructionCodeGeneratorX86 instruction_visitor_;
631   ParallelMoveResolverX86 move_resolver_;
632   X86Assembler assembler_;
633   const X86InstructionSetFeatures& isa_features_;
634 
635   // PC-relative method patch info for kBootImageLinkTimePcRelative.
636   ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
637   // PC-relative method patch info for kBssEntry.
638   ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_;
639   // PC-relative type patch info for kBootImageLinkTimePcRelative.
640   ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
641   // Type patch locations for kBssEntry.
642   ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
643   // String patch locations; type depends on configuration (app .bss or boot image).
644   ArenaDeque<X86PcRelativePatchInfo> string_patches_;
645 
646   // Patches for string root accesses in JIT compiled code.
647   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
648   // Patches for class root accesses in JIT compiled code.
649   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
650 
651   // Offset to the start of the constant area in the assembled code.
652   // Used for fixups to the constant area.
653   int32_t constant_area_start_;
654 
655   // Fixups for jump tables that need to be patched after the constant table is generated.
656   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
657 
658   // Maps a HX86ComputeBaseMethodAddress instruction id, to its offset in the
659   // compiled code.
660   ArenaSafeMap<uint32_t, int32_t> method_address_offset_;
661 
662   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
663 };
664 
665 }  // namespace x86
666 }  // namespace art
667 
668 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
669