• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86.h"
18 
19 #include "arch/x86/jni_frame_x86.h"
20 #include "art_method-inl.h"
21 #include "class_table.h"
22 #include "code_generator_utils.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_utils.h"
31 #include "intrinsics_x86.h"
32 #include "jit/profiling_info.h"
33 #include "linker/linker_patch.h"
34 #include "lock_word.h"
35 #include "mirror/array-inl.h"
36 #include "mirror/class-inl.h"
37 #include "mirror/var_handle.h"
38 #include "optimizing/nodes.h"
39 #include "scoped_thread_state_change-inl.h"
40 #include "thread.h"
41 #include "utils/assembler.h"
42 #include "utils/stack_checks.h"
43 #include "utils/x86/assembler_x86.h"
44 #include "utils/x86/managed_register_x86.h"
45 
46 namespace art HIDDEN {
47 
48 template<class MirrorType>
49 class GcRoot;
50 
51 namespace x86 {
52 
53 static constexpr int kCurrentMethodStackOffset = 0;
54 static constexpr Register kMethodRegisterArgument = EAX;
55 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
56 
57 static constexpr int kC2ConditionMask = 0x400;
58 
59 static constexpr int kFakeReturnRegister = Register(8);
60 
61 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
62 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
63 
OneRegInReferenceOutSaveEverythingCallerSaves()64 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
65   InvokeRuntimeCallingConvention calling_convention;
66   RegisterSet caller_saves = RegisterSet::Empty();
67   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
68   // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
69   // that the the kPrimNot result register is the same as the first argument register.
70   return caller_saves;
71 }
72 
73 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
74 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
75 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
76 
77 class NullCheckSlowPathX86 : public SlowPathCode {
78  public:
NullCheckSlowPathX86(HNullCheck * instruction)79   explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
80 
EmitNativeCode(CodeGenerator * codegen)81   void EmitNativeCode(CodeGenerator* codegen) override {
82     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
83     __ Bind(GetEntryLabel());
84     if (instruction_->CanThrowIntoCatchBlock()) {
85       // Live registers will be restored in the catch block if caught.
86       SaveLiveRegisters(codegen, instruction_->GetLocations());
87     }
88     x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
89                                instruction_,
90                                instruction_->GetDexPc(),
91                                this);
92     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
93   }
94 
IsFatal() const95   bool IsFatal() const override { return true; }
96 
GetDescription() const97   const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
98 
99  private:
100   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
101 };
102 
103 class DivZeroCheckSlowPathX86 : public SlowPathCode {
104  public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)105   explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
106 
EmitNativeCode(CodeGenerator * codegen)107   void EmitNativeCode(CodeGenerator* codegen) override {
108     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
109     __ Bind(GetEntryLabel());
110     x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
111     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
112   }
113 
IsFatal() const114   bool IsFatal() const override { return true; }
115 
GetDescription() const116   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
117 
118  private:
119   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
120 };
121 
122 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
123  public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)124   DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
125       : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
126 
EmitNativeCode(CodeGenerator * codegen)127   void EmitNativeCode(CodeGenerator* codegen) override {
128     __ Bind(GetEntryLabel());
129     if (is_div_) {
130       __ negl(reg_);
131     } else {
132       __ movl(reg_, Immediate(0));
133     }
134     __ jmp(GetExitLabel());
135   }
136 
GetDescription() const137   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
138 
139  private:
140   Register reg_;
141   bool is_div_;
142   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
143 };
144 
145 class BoundsCheckSlowPathX86 : public SlowPathCode {
146  public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)147   explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
148 
EmitNativeCode(CodeGenerator * codegen)149   void EmitNativeCode(CodeGenerator* codegen) override {
150     LocationSummary* locations = instruction_->GetLocations();
151     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
152     __ Bind(GetEntryLabel());
153     if (instruction_->CanThrowIntoCatchBlock()) {
154       // Live registers will be restored in the catch block if caught.
155       SaveLiveRegisters(codegen, locations);
156     }
157 
158     Location index_loc = locations->InAt(0);
159     Location length_loc = locations->InAt(1);
160     InvokeRuntimeCallingConvention calling_convention;
161     Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
162     Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
163 
164     // Are we using an array length from memory?
165     if (!length_loc.IsValid()) {
166       DCHECK(instruction_->InputAt(1)->IsArrayLength());
167       HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
168       DCHECK(array_length->IsEmittedAtUseSite());
169       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
170       Location array_loc = array_length->GetLocations()->InAt(0);
171       if (!index_loc.Equals(length_arg)) {
172         // The index is not clobbered by loading the length directly to `length_arg`.
173         __ movl(length_arg.AsRegister<Register>(),
174                 Address(array_loc.AsRegister<Register>(), len_offset));
175         x86_codegen->Move32(index_arg, index_loc);
176       } else if (!array_loc.Equals(index_arg)) {
177         // The array reference is not clobbered by the index move.
178         x86_codegen->Move32(index_arg, index_loc);
179         __ movl(length_arg.AsRegister<Register>(),
180                 Address(array_loc.AsRegister<Register>(), len_offset));
181       } else {
182         // We do not have a temporary we could use, so swap the registers using the
183         // parallel move resolver and replace the array with the length afterwards.
184         codegen->EmitParallelMoves(
185             index_loc,
186             index_arg,
187             DataType::Type::kInt32,
188             array_loc,
189             length_arg,
190             DataType::Type::kReference);
191         __ movl(length_arg.AsRegister<Register>(),
192                 Address(length_arg.AsRegister<Register>(), len_offset));
193       }
194       if (mirror::kUseStringCompression && array_length->IsStringLength()) {
195         __ shrl(length_arg.AsRegister<Register>(), Immediate(1));
196       }
197     } else {
198       // We're moving two locations to locations that could overlap,
199       // so we need a parallel move resolver.
200       codegen->EmitParallelMoves(
201           index_loc,
202           index_arg,
203           DataType::Type::kInt32,
204           length_loc,
205           length_arg,
206           DataType::Type::kInt32);
207     }
208 
209     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
210         ? kQuickThrowStringBounds
211         : kQuickThrowArrayBounds;
212     x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
213     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
214     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
215   }
216 
IsFatal() const217   bool IsFatal() const override { return true; }
218 
GetDescription() const219   const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
220 
221  private:
222   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
223 };
224 
225 class SuspendCheckSlowPathX86 : public SlowPathCode {
226  public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)227   SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
228       : SlowPathCode(instruction), successor_(successor) {}
229 
EmitNativeCode(CodeGenerator * codegen)230   void EmitNativeCode(CodeGenerator* codegen) override {
231     LocationSummary* locations = instruction_->GetLocations();
232     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
233     __ Bind(GetEntryLabel());
234     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
235     x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
236     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
237     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
238     if (successor_ == nullptr) {
239       __ jmp(GetReturnLabel());
240     } else {
241       __ jmp(x86_codegen->GetLabelOf(successor_));
242     }
243   }
244 
GetReturnLabel()245   Label* GetReturnLabel() {
246     DCHECK(successor_ == nullptr);
247     return &return_label_;
248   }
249 
GetSuccessor() const250   HBasicBlock* GetSuccessor() const {
251     return successor_;
252   }
253 
GetDescription() const254   const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
255 
256  private:
257   HBasicBlock* const successor_;
258   Label return_label_;
259 
260   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
261 };
262 
263 class LoadStringSlowPathX86 : public SlowPathCode {
264  public:
LoadStringSlowPathX86(HLoadString * instruction)265   explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
266 
EmitNativeCode(CodeGenerator * codegen)267   void EmitNativeCode(CodeGenerator* codegen) override {
268     LocationSummary* locations = instruction_->GetLocations();
269     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
270 
271     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
272     __ Bind(GetEntryLabel());
273     SaveLiveRegisters(codegen, locations);
274 
275     InvokeRuntimeCallingConvention calling_convention;
276     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
277     __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
278     x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
279     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
280     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
281     RestoreLiveRegisters(codegen, locations);
282 
283     __ jmp(GetExitLabel());
284   }
285 
GetDescription() const286   const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
287 
288  private:
289   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
290 };
291 
292 class LoadClassSlowPathX86 : public SlowPathCode {
293  public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)294   LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
295       : SlowPathCode(at), cls_(cls) {
296     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
297     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
298   }
299 
EmitNativeCode(CodeGenerator * codegen)300   void EmitNativeCode(CodeGenerator* codegen) override {
301     LocationSummary* locations = instruction_->GetLocations();
302     Location out = locations->Out();
303     const uint32_t dex_pc = instruction_->GetDexPc();
304     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
305     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
306 
307     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
308     __ Bind(GetEntryLabel());
309     SaveLiveRegisters(codegen, locations);
310 
311     InvokeRuntimeCallingConvention calling_convention;
312     if (must_resolve_type) {
313       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()) ||
314              x86_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
315              ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
316                              &cls_->GetDexFile()));
317       dex::TypeIndex type_index = cls_->GetTypeIndex();
318       __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
319       if (cls_->NeedsAccessCheck()) {
320         CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
321         x86_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
322       } else {
323         CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
324         x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
325       }
326       // If we also must_do_clinit, the resolved type is now in the correct register.
327     } else {
328       DCHECK(must_do_clinit);
329       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
330       x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
331     }
332     if (must_do_clinit) {
333       x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
334       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
335     }
336 
337     // Move the class to the desired location.
338     if (out.IsValid()) {
339       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
340       x86_codegen->Move32(out, Location::RegisterLocation(EAX));
341     }
342     RestoreLiveRegisters(codegen, locations);
343     __ jmp(GetExitLabel());
344   }
345 
GetDescription() const346   const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
347 
348  private:
349   // The class this slow path will load.
350   HLoadClass* const cls_;
351 
352   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
353 };
354 
355 class TypeCheckSlowPathX86 : public SlowPathCode {
356  public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)357   TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
358       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
359 
EmitNativeCode(CodeGenerator * codegen)360   void EmitNativeCode(CodeGenerator* codegen) override {
361     LocationSummary* locations = instruction_->GetLocations();
362     DCHECK(instruction_->IsCheckCast()
363            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
364 
365     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
366     __ Bind(GetEntryLabel());
367 
368     if (kPoisonHeapReferences &&
369         instruction_->IsCheckCast() &&
370         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
371       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
372       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
373     }
374 
375     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
376       SaveLiveRegisters(codegen, locations);
377     }
378 
379     // We're moving two locations to locations that could overlap, so we need a parallel
380     // move resolver.
381     InvokeRuntimeCallingConvention calling_convention;
382     x86_codegen->EmitParallelMoves(locations->InAt(0),
383                                    Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
384                                    DataType::Type::kReference,
385                                    locations->InAt(1),
386                                    Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
387                                    DataType::Type::kReference);
388     if (instruction_->IsInstanceOf()) {
389       x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
390                                  instruction_,
391                                  instruction_->GetDexPc(),
392                                  this);
393       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
394     } else {
395       DCHECK(instruction_->IsCheckCast());
396       x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
397                                  instruction_,
398                                  instruction_->GetDexPc(),
399                                  this);
400       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
401     }
402 
403     if (!is_fatal_) {
404       if (instruction_->IsInstanceOf()) {
405         x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
406       }
407       RestoreLiveRegisters(codegen, locations);
408 
409       __ jmp(GetExitLabel());
410     }
411   }
412 
GetDescription() const413   const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const414   bool IsFatal() const override { return is_fatal_; }
415 
416  private:
417   const bool is_fatal_;
418 
419   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
420 };
421 
422 class DeoptimizationSlowPathX86 : public SlowPathCode {
423  public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)424   explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
425     : SlowPathCode(instruction) {}
426 
EmitNativeCode(CodeGenerator * codegen)427   void EmitNativeCode(CodeGenerator* codegen) override {
428     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
429     __ Bind(GetEntryLabel());
430     LocationSummary* locations = instruction_->GetLocations();
431     SaveLiveRegisters(codegen, locations);
432     InvokeRuntimeCallingConvention calling_convention;
433     x86_codegen->Load32BitValue(
434         calling_convention.GetRegisterAt(0),
435         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
436     x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
437     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
438   }
439 
GetDescription() const440   const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
441 
442  private:
443   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
444 };
445 
446 class ArraySetSlowPathX86 : public SlowPathCode {
447  public:
ArraySetSlowPathX86(HInstruction * instruction)448   explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
449 
EmitNativeCode(CodeGenerator * codegen)450   void EmitNativeCode(CodeGenerator* codegen) override {
451     LocationSummary* locations = instruction_->GetLocations();
452     __ Bind(GetEntryLabel());
453     SaveLiveRegisters(codegen, locations);
454 
455     InvokeRuntimeCallingConvention calling_convention;
456     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
457     parallel_move.AddMove(
458         locations->InAt(0),
459         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
460         DataType::Type::kReference,
461         nullptr);
462     parallel_move.AddMove(
463         locations->InAt(1),
464         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
465         DataType::Type::kInt32,
466         nullptr);
467     parallel_move.AddMove(
468         locations->InAt(2),
469         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
470         DataType::Type::kReference,
471         nullptr);
472     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
473 
474     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
475     x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
476     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
477     RestoreLiveRegisters(codegen, locations);
478     __ jmp(GetExitLabel());
479   }
480 
GetDescription() const481   const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
482 
483  private:
484   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
485 };
486 
487 // Slow path marking an object reference `ref` during a read
488 // barrier. The field `obj.field` in the object `obj` holding this
489 // reference does not get updated by this slow path after marking (see
490 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
491 //
492 // This means that after the execution of this slow path, `ref` will
493 // always be up-to-date, but `obj.field` may not; i.e., after the
494 // flip, `ref` will be a to-space reference, but `obj.field` will
495 // probably still be a from-space reference (unless it gets updated by
496 // another thread, or if another thread installed another object
497 // reference (different from `ref`) in `obj.field`).
498 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
499  public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)500   ReadBarrierMarkSlowPathX86(HInstruction* instruction,
501                              Location ref,
502                              bool unpoison_ref_before_marking)
503       : SlowPathCode(instruction),
504         ref_(ref),
505         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
506     DCHECK(gUseReadBarrier);
507   }
508 
GetDescription() const509   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
510 
EmitNativeCode(CodeGenerator * codegen)511   void EmitNativeCode(CodeGenerator* codegen) override {
512     LocationSummary* locations = instruction_->GetLocations();
513     Register ref_reg = ref_.AsRegister<Register>();
514     DCHECK(locations->CanCall());
515     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
516     DCHECK(instruction_->IsInstanceFieldGet() ||
517            instruction_->IsPredicatedInstanceFieldGet() ||
518            instruction_->IsStaticFieldGet() ||
519            instruction_->IsArrayGet() ||
520            instruction_->IsArraySet() ||
521            instruction_->IsLoadClass() ||
522            instruction_->IsLoadString() ||
523            instruction_->IsInstanceOf() ||
524            instruction_->IsCheckCast() ||
525            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
526         << "Unexpected instruction in read barrier marking slow path: "
527         << instruction_->DebugName();
528 
529     __ Bind(GetEntryLabel());
530     if (unpoison_ref_before_marking_) {
531       // Object* ref = ref_addr->AsMirrorPtr()
532       __ MaybeUnpoisonHeapReference(ref_reg);
533     }
534     // No need to save live registers; it's taken care of by the
535     // entrypoint. Also, there is no need to update the stack mask,
536     // as this runtime call will not trigger a garbage collection.
537     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
538     DCHECK_NE(ref_reg, ESP);
539     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
540     // "Compact" slow path, saving two moves.
541     //
542     // Instead of using the standard runtime calling convention (input
543     // and output in EAX):
544     //
545     //   EAX <- ref
546     //   EAX <- ReadBarrierMark(EAX)
547     //   ref <- EAX
548     //
549     // we just use rX (the register containing `ref`) as input and output
550     // of a dedicated entrypoint:
551     //
552     //   rX <- ReadBarrierMarkRegX(rX)
553     //
554     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
555     // This runtime call does not require a stack map.
556     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
557     __ jmp(GetExitLabel());
558   }
559 
560  private:
561   // The location (register) of the marked object reference.
562   const Location ref_;
563   // Should the reference in `ref_` be unpoisoned prior to marking it?
564   const bool unpoison_ref_before_marking_;
565 
566   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
567 };
568 
569 // Slow path marking an object reference `ref` during a read barrier,
570 // and if needed, atomically updating the field `obj.field` in the
571 // object `obj` holding this reference after marking (contrary to
572 // ReadBarrierMarkSlowPathX86 above, which never tries to update
573 // `obj.field`).
574 //
575 // This means that after the execution of this slow path, both `ref`
576 // and `obj.field` will be up-to-date; i.e., after the flip, both will
577 // hold the same to-space reference (unless another thread installed
578 // another object reference (different from `ref`) in `obj.field`).
579 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
580  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)581   ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
582                                            Location ref,
583                                            Register obj,
584                                            const Address& field_addr,
585                                            bool unpoison_ref_before_marking,
586                                            Register temp)
587       : SlowPathCode(instruction),
588         ref_(ref),
589         obj_(obj),
590         field_addr_(field_addr),
591         unpoison_ref_before_marking_(unpoison_ref_before_marking),
592         temp_(temp) {
593     DCHECK(gUseReadBarrier);
594   }
595 
GetDescription() const596   const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
597 
EmitNativeCode(CodeGenerator * codegen)598   void EmitNativeCode(CodeGenerator* codegen) override {
599     LocationSummary* locations = instruction_->GetLocations();
600     Register ref_reg = ref_.AsRegister<Register>();
601     DCHECK(locations->CanCall());
602     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
603     DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
604         << "Unexpected instruction in read barrier marking and field updating slow path: "
605         << instruction_->DebugName();
606     HInvoke* invoke = instruction_->AsInvoke();
607     DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
608 
609     __ Bind(GetEntryLabel());
610     if (unpoison_ref_before_marking_) {
611       // Object* ref = ref_addr->AsMirrorPtr()
612       __ MaybeUnpoisonHeapReference(ref_reg);
613     }
614 
615     // Save the old (unpoisoned) reference.
616     __ movl(temp_, ref_reg);
617 
618     // No need to save live registers; it's taken care of by the
619     // entrypoint. Also, there is no need to update the stack mask,
620     // as this runtime call will not trigger a garbage collection.
621     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
622     DCHECK_NE(ref_reg, ESP);
623     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
624     // "Compact" slow path, saving two moves.
625     //
626     // Instead of using the standard runtime calling convention (input
627     // and output in EAX):
628     //
629     //   EAX <- ref
630     //   EAX <- ReadBarrierMark(EAX)
631     //   ref <- EAX
632     //
633     // we just use rX (the register containing `ref`) as input and output
634     // of a dedicated entrypoint:
635     //
636     //   rX <- ReadBarrierMarkRegX(rX)
637     //
638     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
639     // This runtime call does not require a stack map.
640     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
641 
642     // If the new reference is different from the old reference,
643     // update the field in the holder (`*field_addr`).
644     //
645     // Note that this field could also hold a different object, if
646     // another thread had concurrently changed it. In that case, the
647     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
648     // operation below would abort the CAS, leaving the field as-is.
649     NearLabel done;
650     __ cmpl(temp_, ref_reg);
651     __ j(kEqual, &done);
652 
653     // Update the the holder's field atomically.  This may fail if
654     // mutator updates before us, but it's OK.  This is achieved
655     // using a strong compare-and-set (CAS) operation with relaxed
656     // memory synchronization ordering, where the expected value is
657     // the old reference and the desired value is the new reference.
658     // This operation is implemented with a 32-bit LOCK CMPXLCHG
659     // instruction, which requires the expected value (the old
660     // reference) to be in EAX.  Save EAX beforehand, and move the
661     // expected value (stored in `temp_`) into EAX.
662     __ pushl(EAX);
663     __ movl(EAX, temp_);
664 
665     // Convenience aliases.
666     Register base = obj_;
667     Register expected = EAX;
668     Register value = ref_reg;
669 
670     bool base_equals_value = (base == value);
671     if (kPoisonHeapReferences) {
672       if (base_equals_value) {
673         // If `base` and `value` are the same register location, move
674         // `value` to a temporary register.  This way, poisoning
675         // `value` won't invalidate `base`.
676         value = temp_;
677         __ movl(value, base);
678       }
679 
680       // Check that the register allocator did not assign the location
681       // of `expected` (EAX) to `value` nor to `base`, so that heap
682       // poisoning (when enabled) works as intended below.
683       // - If `value` were equal to `expected`, both references would
684       //   be poisoned twice, meaning they would not be poisoned at
685       //   all, as heap poisoning uses address negation.
686       // - If `base` were equal to `expected`, poisoning `expected`
687       //   would invalidate `base`.
688       DCHECK_NE(value, expected);
689       DCHECK_NE(base, expected);
690 
691       __ PoisonHeapReference(expected);
692       __ PoisonHeapReference(value);
693     }
694 
695     __ LockCmpxchgl(field_addr_, value);
696 
697     // If heap poisoning is enabled, we need to unpoison the values
698     // that were poisoned earlier.
699     if (kPoisonHeapReferences) {
700       if (base_equals_value) {
701         // `value` has been moved to a temporary register, no need
702         // to unpoison it.
703       } else {
704         __ UnpoisonHeapReference(value);
705       }
706       // No need to unpoison `expected` (EAX), as it is be overwritten below.
707     }
708 
709     // Restore EAX.
710     __ popl(EAX);
711 
712     __ Bind(&done);
713     __ jmp(GetExitLabel());
714   }
715 
716  private:
717   // The location (register) of the marked object reference.
718   const Location ref_;
719   // The register containing the object holding the marked object reference field.
720   const Register obj_;
721   // The address of the marked reference field.  The base of this address must be `obj_`.
722   const Address field_addr_;
723 
724   // Should the reference in `ref_` be unpoisoned prior to marking it?
725   const bool unpoison_ref_before_marking_;
726 
727   const Register temp_;
728 
729   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
730 };
731 
732 // Slow path generating a read barrier for a heap reference.
733 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
734  public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)735   ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
736                                          Location out,
737                                          Location ref,
738                                          Location obj,
739                                          uint32_t offset,
740                                          Location index)
741       : SlowPathCode(instruction),
742         out_(out),
743         ref_(ref),
744         obj_(obj),
745         offset_(offset),
746         index_(index) {
747     DCHECK(gUseReadBarrier);
748     // If `obj` is equal to `out` or `ref`, it means the initial object
749     // has been overwritten by (or after) the heap object reference load
750     // to be instrumented, e.g.:
751     //
752     //   __ movl(out, Address(out, offset));
753     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
754     //
755     // In that case, we have lost the information about the original
756     // object, and the emitted read barrier cannot work properly.
757     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
758     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
759   }
760 
EmitNativeCode(CodeGenerator * codegen)761   void EmitNativeCode(CodeGenerator* codegen) override {
762     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
763     LocationSummary* locations = instruction_->GetLocations();
764     Register reg_out = out_.AsRegister<Register>();
765     DCHECK(locations->CanCall());
766     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
767     DCHECK(instruction_->IsInstanceFieldGet() ||
768            instruction_->IsPredicatedInstanceFieldGet() ||
769            instruction_->IsStaticFieldGet() ||
770            instruction_->IsArrayGet() ||
771            instruction_->IsInstanceOf() ||
772            instruction_->IsCheckCast() ||
773            (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
774         << "Unexpected instruction in read barrier for heap reference slow path: "
775         << instruction_->DebugName();
776 
777     __ Bind(GetEntryLabel());
778     SaveLiveRegisters(codegen, locations);
779 
780     // We may have to change the index's value, but as `index_` is a
781     // constant member (like other "inputs" of this slow path),
782     // introduce a copy of it, `index`.
783     Location index = index_;
784     if (index_.IsValid()) {
785       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
786       if (instruction_->IsArrayGet()) {
787         // Compute the actual memory offset and store it in `index`.
788         Register index_reg = index_.AsRegister<Register>();
789         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
790         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
791           // We are about to change the value of `index_reg` (see the
792           // calls to art::x86::X86Assembler::shll and
793           // art::x86::X86Assembler::AddImmediate below), but it has
794           // not been saved by the previous call to
795           // art::SlowPathCode::SaveLiveRegisters, as it is a
796           // callee-save register --
797           // art::SlowPathCode::SaveLiveRegisters does not consider
798           // callee-save registers, as it has been designed with the
799           // assumption that callee-save registers are supposed to be
800           // handled by the called function.  So, as a callee-save
801           // register, `index_reg` _would_ eventually be saved onto
802           // the stack, but it would be too late: we would have
803           // changed its value earlier.  Therefore, we manually save
804           // it here into another freely available register,
805           // `free_reg`, chosen of course among the caller-save
806           // registers (as a callee-save `free_reg` register would
807           // exhibit the same problem).
808           //
809           // Note we could have requested a temporary register from
810           // the register allocator instead; but we prefer not to, as
811           // this is a slow path, and we know we can find a
812           // caller-save register that is available.
813           Register free_reg = FindAvailableCallerSaveRegister(codegen);
814           __ movl(free_reg, index_reg);
815           index_reg = free_reg;
816           index = Location::RegisterLocation(index_reg);
817         } else {
818           // The initial register stored in `index_` has already been
819           // saved in the call to art::SlowPathCode::SaveLiveRegisters
820           // (as it is not a callee-save register), so we can freely
821           // use it.
822         }
823         // Shifting the index value contained in `index_reg` by the scale
824         // factor (2) cannot overflow in practice, as the runtime is
825         // unable to allocate object arrays with a size larger than
826         // 2^26 - 1 (that is, 2^28 - 4 bytes).
827         __ shll(index_reg, Immediate(TIMES_4));
828         static_assert(
829             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
830             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
831         __ AddImmediate(index_reg, Immediate(offset_));
832       } else {
833         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
834         // intrinsics, `index_` is not shifted by a scale factor of 2
835         // (as in the case of ArrayGet), as it is actually an offset
836         // to an object field within an object.
837         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
838         DCHECK(instruction_->GetLocations()->Intrinsified());
839         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
840                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
841                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
842                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
843                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
844             << instruction_->AsInvoke()->GetIntrinsic();
845         DCHECK_EQ(offset_, 0U);
846         DCHECK(index_.IsRegisterPair());
847         // UnsafeGet's offset location is a register pair, the low
848         // part contains the correct offset.
849         index = index_.ToLow();
850       }
851     }
852 
853     // We're moving two or three locations to locations that could
854     // overlap, so we need a parallel move resolver.
855     InvokeRuntimeCallingConvention calling_convention;
856     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
857     parallel_move.AddMove(ref_,
858                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
859                           DataType::Type::kReference,
860                           nullptr);
861     parallel_move.AddMove(obj_,
862                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
863                           DataType::Type::kReference,
864                           nullptr);
865     if (index.IsValid()) {
866       parallel_move.AddMove(index,
867                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
868                             DataType::Type::kInt32,
869                             nullptr);
870       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
871     } else {
872       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
873       __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
874     }
875     x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
876     CheckEntrypointTypes<
877         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
878     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
879 
880     RestoreLiveRegisters(codegen, locations);
881     __ jmp(GetExitLabel());
882   }
883 
GetDescription() const884   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
885 
886  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)887   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
888     size_t ref = static_cast<int>(ref_.AsRegister<Register>());
889     size_t obj = static_cast<int>(obj_.AsRegister<Register>());
890     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
891       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
892         return static_cast<Register>(i);
893       }
894     }
895     // We shall never fail to find a free caller-save register, as
896     // there are more than two core caller-save registers on x86
897     // (meaning it is possible to find one which is different from
898     // `ref` and `obj`).
899     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
900     LOG(FATAL) << "Could not find a free caller-save register";
901     UNREACHABLE();
902   }
903 
904   const Location out_;
905   const Location ref_;
906   const Location obj_;
907   const uint32_t offset_;
908   // An additional location containing an index to an array.
909   // Only used for HArrayGet and the UnsafeGetObject &
910   // UnsafeGetObjectVolatile intrinsics.
911   const Location index_;
912 
913   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
914 };
915 
916 // Slow path generating a read barrier for a GC root.
917 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
918  public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)919   ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
920       : SlowPathCode(instruction), out_(out), root_(root) {
921     DCHECK(gUseReadBarrier);
922   }
923 
EmitNativeCode(CodeGenerator * codegen)924   void EmitNativeCode(CodeGenerator* codegen) override {
925     LocationSummary* locations = instruction_->GetLocations();
926     Register reg_out = out_.AsRegister<Register>();
927     DCHECK(locations->CanCall());
928     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
929     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
930         << "Unexpected instruction in read barrier for GC root slow path: "
931         << instruction_->DebugName();
932 
933     __ Bind(GetEntryLabel());
934     SaveLiveRegisters(codegen, locations);
935 
936     InvokeRuntimeCallingConvention calling_convention;
937     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
938     x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
939     x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
940                                instruction_,
941                                instruction_->GetDexPc(),
942                                this);
943     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
944     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
945 
946     RestoreLiveRegisters(codegen, locations);
947     __ jmp(GetExitLabel());
948   }
949 
GetDescription() const950   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
951 
952  private:
953   const Location out_;
954   const Location root_;
955 
956   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
957 };
958 
959 class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
960  public:
MethodEntryExitHooksSlowPathX86(HInstruction * instruction)961   explicit MethodEntryExitHooksSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
962 
EmitNativeCode(CodeGenerator * codegen)963   void EmitNativeCode(CodeGenerator* codegen) override {
964     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
965     LocationSummary* locations = instruction_->GetLocations();
966     QuickEntrypointEnum entry_point =
967         (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
968     __ Bind(GetEntryLabel());
969     SaveLiveRegisters(codegen, locations);
970     if (instruction_->IsMethodExitHook()) {
971       __ movl(EBX, Immediate(codegen->GetFrameSize()));
972     }
973     x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
974     RestoreLiveRegisters(codegen, locations);
975     __ jmp(GetExitLabel());
976   }
977 
GetDescription() const978   const char* GetDescription() const override {
979     return "MethodEntryExitHooksSlowPath";
980   }
981 
982  private:
983   DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86);
984 };
985 
986 class CompileOptimizedSlowPathX86 : public SlowPathCode {
987  public:
CompileOptimizedSlowPathX86()988   CompileOptimizedSlowPathX86() : SlowPathCode(/* instruction= */ nullptr) {}
989 
EmitNativeCode(CodeGenerator * codegen)990   void EmitNativeCode(CodeGenerator* codegen) override {
991     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
992     __ Bind(GetEntryLabel());
993     x86_codegen->GenerateInvokeRuntime(
994         GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
995     __ jmp(GetExitLabel());
996   }
997 
GetDescription() const998   const char* GetDescription() const override {
999     return "CompileOptimizedSlowPath";
1000   }
1001 
1002  private:
1003   DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86);
1004 };
1005 
1006 #undef __
1007 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1008 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
1009 
X86Condition(IfCondition cond)1010 inline Condition X86Condition(IfCondition cond) {
1011   switch (cond) {
1012     case kCondEQ: return kEqual;
1013     case kCondNE: return kNotEqual;
1014     case kCondLT: return kLess;
1015     case kCondLE: return kLessEqual;
1016     case kCondGT: return kGreater;
1017     case kCondGE: return kGreaterEqual;
1018     case kCondB:  return kBelow;
1019     case kCondBE: return kBelowEqual;
1020     case kCondA:  return kAbove;
1021     case kCondAE: return kAboveEqual;
1022   }
1023   LOG(FATAL) << "Unreachable";
1024   UNREACHABLE();
1025 }
1026 
1027 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)1028 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
1029   switch (cond) {
1030     case kCondEQ: return kEqual;
1031     case kCondNE: return kNotEqual;
1032     // Signed to unsigned, and FP to x86 name.
1033     case kCondLT: return kBelow;
1034     case kCondLE: return kBelowEqual;
1035     case kCondGT: return kAbove;
1036     case kCondGE: return kAboveEqual;
1037     // Unsigned remain unchanged.
1038     case kCondB:  return kBelow;
1039     case kCondBE: return kBelowEqual;
1040     case kCondA:  return kAbove;
1041     case kCondAE: return kAboveEqual;
1042   }
1043   LOG(FATAL) << "Unreachable";
1044   UNREACHABLE();
1045 }
1046 
DumpCoreRegister(std::ostream & stream,int reg) const1047 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
1048   stream << Register(reg);
1049 }
1050 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1051 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1052   stream << XmmRegister(reg);
1053 }
1054 
GetInstructionSetFeatures() const1055 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
1056   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
1057 }
1058 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1059 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1060   __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
1061   return kX86WordSize;
1062 }
1063 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1064 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1065   __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
1066   return kX86WordSize;
1067 }
1068 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1069 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1070   if (GetGraph()->HasSIMD()) {
1071     __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
1072   } else {
1073     __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
1074   }
1075   return GetSlowPathFPWidth();
1076 }
1077 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1078 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1079   if (GetGraph()->HasSIMD()) {
1080     __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
1081   } else {
1082     __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1083   }
1084   return GetSlowPathFPWidth();
1085 }
1086 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1087 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1088                                      HInstruction* instruction,
1089                                      uint32_t dex_pc,
1090                                      SlowPathCode* slow_path) {
1091   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1092   GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1093   if (EntrypointRequiresStackMap(entrypoint)) {
1094     RecordPcInfo(instruction, dex_pc, slow_path);
1095   }
1096 }
1097 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1098 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1099                                                            HInstruction* instruction,
1100                                                            SlowPathCode* slow_path) {
1101   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1102   GenerateInvokeRuntime(entry_point_offset);
1103 }
1104 
GenerateInvokeRuntime(int32_t entry_point_offset)1105 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1106   __ fs()->call(Address::Absolute(entry_point_offset));
1107 }
1108 
1109 namespace detail {
1110 // Mark which intrinsics we don't have handcrafted code for.
1111 template <Intrinsics T>
1112 struct IsUnimplemented {
1113   bool is_unimplemented = false;
1114 };
1115 
1116 #define TRUE_OVERRIDE(Name)                     \
1117   template <>                                   \
1118   struct IsUnimplemented<Intrinsics::k##Name> { \
1119     bool is_unimplemented = true;               \
1120   };
1121 UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
1122 #undef TRUE_OVERRIDE
1123 
1124 #include "intrinsics_list.h"
1125 static constexpr bool kIsIntrinsicUnimplemented[] = {
1126   false,  // kNone
1127 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1128   IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1129   INTRINSICS_LIST(IS_UNIMPLEMENTED)
1130 #undef IS_UNIMPLEMENTED
1131 };
1132 #undef INTRINSICS_LIST
1133 
1134 }  // namespace detail
1135 
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1136 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1137                                    const CompilerOptions& compiler_options,
1138                                    OptimizingCompilerStats* stats)
1139     : CodeGenerator(graph,
1140                     kNumberOfCpuRegisters,
1141                     kNumberOfXmmRegisters,
1142                     kNumberOfRegisterPairs,
1143                     ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1144                                         arraysize(kCoreCalleeSaves))
1145                         | (1 << kFakeReturnRegister),
1146                     0,
1147                     compiler_options,
1148                     stats,
1149                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1150       block_labels_(nullptr),
1151       location_builder_(graph, this),
1152       instruction_visitor_(graph, this),
1153       move_resolver_(graph->GetAllocator(), this),
1154       assembler_(graph->GetAllocator(),
1155                  compiler_options.GetInstructionSetFeatures()->AsX86InstructionSetFeatures()),
1156       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1157       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1158       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1159       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1160       public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1161       package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1162       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1163       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1164       boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1165       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1166       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1167       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1168       constant_area_start_(-1),
1169       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1170       method_address_offset_(std::less<uint32_t>(),
1171                              graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1172   // Use a fake return address register to mimic Quick.
1173   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1174 }
1175 
SetupBlockedRegisters() const1176 void CodeGeneratorX86::SetupBlockedRegisters() const {
1177   // Stack register is always reserved.
1178   blocked_core_registers_[ESP] = true;
1179 }
1180 
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1181 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1182       : InstructionCodeGenerator(graph, codegen),
1183         assembler_(codegen->GetAssembler()),
1184         codegen_(codegen) {}
1185 
DWARFReg(Register reg)1186 static dwarf::Reg DWARFReg(Register reg) {
1187   return dwarf::Reg::X86Core(static_cast<int>(reg));
1188 }
1189 
SetInForReturnValue(HInstruction * ret,LocationSummary * locations)1190 void SetInForReturnValue(HInstruction* ret, LocationSummary* locations) {
1191   switch (ret->InputAt(0)->GetType()) {
1192     case DataType::Type::kReference:
1193     case DataType::Type::kBool:
1194     case DataType::Type::kUint8:
1195     case DataType::Type::kInt8:
1196     case DataType::Type::kUint16:
1197     case DataType::Type::kInt16:
1198     case DataType::Type::kInt32:
1199       locations->SetInAt(0, Location::RegisterLocation(EAX));
1200       break;
1201 
1202     case DataType::Type::kInt64:
1203       locations->SetInAt(0, Location::RegisterPairLocation(EAX, EDX));
1204       break;
1205 
1206     case DataType::Type::kFloat32:
1207     case DataType::Type::kFloat64:
1208       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1209       break;
1210 
1211     case DataType::Type::kVoid:
1212       locations->SetInAt(0, Location::NoLocation());
1213       break;
1214 
1215     default:
1216       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
1217   }
1218 }
1219 
VisitMethodExitHook(HMethodExitHook * method_hook)1220 void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
1221   LocationSummary* locations = new (GetGraph()->GetAllocator())
1222       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1223   SetInForReturnValue(method_hook, locations);
1224 }
1225 
GenerateMethodEntryExitHook(HInstruction * instruction)1226 void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
1227   SlowPathCode* slow_path =
1228       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
1229   codegen_->AddSlowPath(slow_path);
1230 
1231   if (instruction->IsMethodExitHook()) {
1232     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1233     // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1234     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1235     // disabled in debuggable runtime. The other bit is used when this method itself requires a
1236     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1237     __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1238     __ j(kNotEqual, slow_path->GetEntryLabel());
1239   }
1240 
1241   uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1242   MemberOffset  offset = instruction->IsMethodExitHook() ?
1243       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
1244       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1245   __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0));
1246   __ j(kNotEqual, slow_path->GetEntryLabel());
1247   __ Bind(slow_path->GetExitLabel());
1248 }
1249 
VisitMethodExitHook(HMethodExitHook * instruction)1250 void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instruction) {
1251   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1252   DCHECK(codegen_->RequiresCurrentMethod());
1253   GenerateMethodEntryExitHook(instruction);
1254 }
1255 
VisitMethodEntryHook(HMethodEntryHook * method_hook)1256 void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1257   new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1258 }
1259 
VisitMethodEntryHook(HMethodEntryHook * instruction)1260 void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1261   DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1262   DCHECK(codegen_->RequiresCurrentMethod());
1263   GenerateMethodEntryExitHook(instruction);
1264 }
1265 
MaybeIncrementHotness(bool is_frame_entry)1266 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
1267   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1268     Register reg = EAX;
1269     if (is_frame_entry) {
1270       reg = kMethodRegisterArgument;
1271     } else {
1272       __ pushl(EAX);
1273       __ cfi().AdjustCFAOffset(4);
1274       __ movl(EAX, Address(ESP, kX86WordSize));
1275     }
1276     NearLabel overflow;
1277     __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1278             Immediate(interpreter::kNterpHotnessValue));
1279     __ j(kEqual, &overflow);
1280     __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(-1));
1281     __ Bind(&overflow);
1282     if (!is_frame_entry) {
1283       __ popl(EAX);
1284       __ cfi().AdjustCFAOffset(-4);
1285     }
1286   }
1287 
1288   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1289     SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86();
1290     AddSlowPath(slow_path);
1291     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1292     DCHECK(info != nullptr);
1293     uint32_t address = reinterpret_cast32<uint32_t>(info) +
1294         ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1295     DCHECK(!HasEmptyFrame());
1296     // With multiple threads, this can overflow. This is OK, we will eventually get to see
1297     // it reaching 0. Also, at this point we have no register available to look
1298     // at the counter directly.
1299     __ addw(Address::Absolute(address), Immediate(-1));
1300     __ j(kEqual, slow_path->GetEntryLabel());
1301     __ Bind(slow_path->GetExitLabel());
1302   }
1303 }
1304 
GenerateFrameEntry()1305 void CodeGeneratorX86::GenerateFrameEntry() {
1306   __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
1307 
1308   // Check if we need to generate the clinit check. We will jump to the
1309   // resolution stub if the class is not initialized and the executing thread is
1310   // not the thread initializing it.
1311   // We do this before constructing the frame to get the correct stack trace if
1312   // an exception is thrown.
1313   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1314     NearLabel continue_execution, resolution;
1315     // We'll use EBP as temporary.
1316     __ pushl(EBP);
1317     // Check if we're visibly initialized.
1318 
1319     // We don't emit a read barrier here to save on code size. We rely on the
1320     // resolution trampoline to do a suspend check before re-entering this code.
1321     __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
1322     __ cmpb(Address(EBP,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
1323     __ j(kAboveEqual, &continue_execution);
1324 
1325     // Check if we're initializing and the thread initializing is the one
1326     // executing the code.
1327     __ cmpb(Address(EBP,  status_byte_offset), Immediate(shifted_initializing_value));
1328     __ j(kBelow, &resolution);
1329 
1330     __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
1331     __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
1332     __ j(kEqual, &continue_execution);
1333     __ Bind(&resolution);
1334 
1335     __ popl(EBP);
1336     // Jump to the resolution stub.
1337     ThreadOffset32 entrypoint_offset =
1338         GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
1339     __ fs()->jmp(Address::Absolute(entrypoint_offset));
1340 
1341     __ Bind(&continue_execution);
1342     __ popl(EBP);
1343   }
1344 
1345   __ Bind(&frame_entry_label_);
1346   bool skip_overflow_check =
1347       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1348   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1349 
1350   if (!skip_overflow_check) {
1351     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1352     __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1353     RecordPcInfo(nullptr, 0);
1354   }
1355 
1356   if (!HasEmptyFrame()) {
1357     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1358       Register reg = kCoreCalleeSaves[i];
1359       if (allocated_registers_.ContainsCoreRegister(reg)) {
1360         __ pushl(reg);
1361         __ cfi().AdjustCFAOffset(kX86WordSize);
1362         __ cfi().RelOffset(DWARFReg(reg), 0);
1363       }
1364     }
1365 
1366     int adjust = GetFrameSize() - FrameEntrySpillSize();
1367     IncreaseFrame(adjust);
1368     // Save the current method if we need it. Note that we do not
1369     // do this in HCurrentMethod, as the instruction might have been removed
1370     // in the SSA graph.
1371     if (RequiresCurrentMethod()) {
1372       __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1373     }
1374 
1375     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1376       // Initialize should_deoptimize flag to 0.
1377       __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1378     }
1379   }
1380 
1381   MaybeIncrementHotness(/* is_frame_entry= */ true);
1382 }
1383 
GenerateFrameExit()1384 void CodeGeneratorX86::GenerateFrameExit() {
1385   __ cfi().RememberState();
1386   if (!HasEmptyFrame()) {
1387     int adjust = GetFrameSize() - FrameEntrySpillSize();
1388     DecreaseFrame(adjust);
1389 
1390     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1391       Register reg = kCoreCalleeSaves[i];
1392       if (allocated_registers_.ContainsCoreRegister(reg)) {
1393         __ popl(reg);
1394         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1395         __ cfi().Restore(DWARFReg(reg));
1396       }
1397     }
1398   }
1399   __ ret();
1400   __ cfi().RestoreState();
1401   __ cfi().DefCFAOffset(GetFrameSize());
1402 }
1403 
Bind(HBasicBlock * block)1404 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1405   __ Bind(GetLabelOf(block));
1406 }
1407 
GetReturnLocation(DataType::Type type) const1408 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1409   switch (type) {
1410     case DataType::Type::kReference:
1411     case DataType::Type::kBool:
1412     case DataType::Type::kUint8:
1413     case DataType::Type::kInt8:
1414     case DataType::Type::kUint16:
1415     case DataType::Type::kInt16:
1416     case DataType::Type::kUint32:
1417     case DataType::Type::kInt32:
1418       return Location::RegisterLocation(EAX);
1419 
1420     case DataType::Type::kUint64:
1421     case DataType::Type::kInt64:
1422       return Location::RegisterPairLocation(EAX, EDX);
1423 
1424     case DataType::Type::kVoid:
1425       return Location::NoLocation();
1426 
1427     case DataType::Type::kFloat64:
1428     case DataType::Type::kFloat32:
1429       return Location::FpuRegisterLocation(XMM0);
1430   }
1431 
1432   UNREACHABLE();
1433 }
1434 
GetMethodLocation() const1435 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1436   return Location::RegisterLocation(kMethodRegisterArgument);
1437 }
1438 
GetNextLocation(DataType::Type type)1439 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1440   switch (type) {
1441     case DataType::Type::kReference:
1442     case DataType::Type::kBool:
1443     case DataType::Type::kUint8:
1444     case DataType::Type::kInt8:
1445     case DataType::Type::kUint16:
1446     case DataType::Type::kInt16:
1447     case DataType::Type::kInt32: {
1448       uint32_t index = gp_index_++;
1449       stack_index_++;
1450       if (index < calling_convention.GetNumberOfRegisters()) {
1451         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1452       } else {
1453         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1454       }
1455     }
1456 
1457     case DataType::Type::kInt64: {
1458       uint32_t index = gp_index_;
1459       gp_index_ += 2;
1460       stack_index_ += 2;
1461       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1462         X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1463             calling_convention.GetRegisterPairAt(index));
1464         return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1465       } else {
1466         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1467       }
1468     }
1469 
1470     case DataType::Type::kFloat32: {
1471       uint32_t index = float_index_++;
1472       stack_index_++;
1473       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1474         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1475       } else {
1476         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1477       }
1478     }
1479 
1480     case DataType::Type::kFloat64: {
1481       uint32_t index = float_index_++;
1482       stack_index_ += 2;
1483       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1484         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1485       } else {
1486         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1487       }
1488     }
1489 
1490     case DataType::Type::kUint32:
1491     case DataType::Type::kUint64:
1492     case DataType::Type::kVoid:
1493       LOG(FATAL) << "Unexpected parameter type " << type;
1494       UNREACHABLE();
1495   }
1496   return Location::NoLocation();
1497 }
1498 
GetNextLocation(DataType::Type type)1499 Location CriticalNativeCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1500   DCHECK_NE(type, DataType::Type::kReference);
1501 
1502   Location location;
1503   if (DataType::Is64BitType(type)) {
1504     location = Location::DoubleStackSlot(stack_offset_);
1505     stack_offset_ += 2 * kFramePointerSize;
1506   } else {
1507     location = Location::StackSlot(stack_offset_);
1508     stack_offset_ += kFramePointerSize;
1509   }
1510   if (for_register_allocation_) {
1511     location = Location::Any();
1512   }
1513   return location;
1514 }
1515 
GetReturnLocation(DataType::Type type) const1516 Location CriticalNativeCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1517   // We perform conversion to the managed ABI return register after the call if needed.
1518   InvokeDexCallingConventionVisitorX86 dex_calling_convention;
1519   return dex_calling_convention.GetReturnLocation(type);
1520 }
1521 
GetMethodLocation() const1522 Location CriticalNativeCallingConventionVisitorX86::GetMethodLocation() const {
1523   // Pass the method in the hidden argument EAX.
1524   return Location::RegisterLocation(EAX);
1525 }
1526 
Move32(Location destination,Location source)1527 void CodeGeneratorX86::Move32(Location destination, Location source) {
1528   if (source.Equals(destination)) {
1529     return;
1530   }
1531   if (destination.IsRegister()) {
1532     if (source.IsRegister()) {
1533       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1534     } else if (source.IsFpuRegister()) {
1535       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1536     } else if (source.IsConstant()) {
1537       int32_t value = GetInt32ValueOf(source.GetConstant());
1538       __ movl(destination.AsRegister<Register>(), Immediate(value));
1539     } else {
1540       DCHECK(source.IsStackSlot());
1541       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1542     }
1543   } else if (destination.IsFpuRegister()) {
1544     if (source.IsRegister()) {
1545       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1546     } else if (source.IsFpuRegister()) {
1547       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1548     } else {
1549       DCHECK(source.IsStackSlot());
1550       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1551     }
1552   } else {
1553     DCHECK(destination.IsStackSlot()) << destination;
1554     if (source.IsRegister()) {
1555       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1556     } else if (source.IsFpuRegister()) {
1557       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1558     } else if (source.IsConstant()) {
1559       HConstant* constant = source.GetConstant();
1560       int32_t value = GetInt32ValueOf(constant);
1561       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1562     } else {
1563       DCHECK(source.IsStackSlot());
1564       __ pushl(Address(ESP, source.GetStackIndex()));
1565       __ popl(Address(ESP, destination.GetStackIndex()));
1566     }
1567   }
1568 }
1569 
Move64(Location destination,Location source)1570 void CodeGeneratorX86::Move64(Location destination, Location source) {
1571   if (source.Equals(destination)) {
1572     return;
1573   }
1574   if (destination.IsRegisterPair()) {
1575     if (source.IsRegisterPair()) {
1576       EmitParallelMoves(
1577           Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1578           Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1579           DataType::Type::kInt32,
1580           Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1581           Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1582           DataType::Type::kInt32);
1583     } else if (source.IsFpuRegister()) {
1584       XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1585       __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1586       __ psrlq(src_reg, Immediate(32));
1587       __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1588     } else {
1589       // No conflict possible, so just do the moves.
1590       DCHECK(source.IsDoubleStackSlot());
1591       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1592       __ movl(destination.AsRegisterPairHigh<Register>(),
1593               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1594     }
1595   } else if (destination.IsFpuRegister()) {
1596     if (source.IsFpuRegister()) {
1597       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1598     } else if (source.IsDoubleStackSlot()) {
1599       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1600     } else if (source.IsRegisterPair()) {
1601       size_t elem_size = DataType::Size(DataType::Type::kInt32);
1602       // Push the 2 source registers to the stack.
1603       __ pushl(source.AsRegisterPairHigh<Register>());
1604       __ cfi().AdjustCFAOffset(elem_size);
1605       __ pushl(source.AsRegisterPairLow<Register>());
1606       __ cfi().AdjustCFAOffset(elem_size);
1607       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1608       // And remove the temporary stack space we allocated.
1609       DecreaseFrame(2 * elem_size);
1610     } else {
1611       LOG(FATAL) << "Unimplemented";
1612     }
1613   } else {
1614     DCHECK(destination.IsDoubleStackSlot()) << destination;
1615     if (source.IsRegisterPair()) {
1616       // No conflict possible, so just do the moves.
1617       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1618       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1619               source.AsRegisterPairHigh<Register>());
1620     } else if (source.IsFpuRegister()) {
1621       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1622     } else if (source.IsConstant()) {
1623       HConstant* constant = source.GetConstant();
1624       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1625       int64_t value = GetInt64ValueOf(constant);
1626       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1627       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1628               Immediate(High32Bits(value)));
1629     } else {
1630       DCHECK(source.IsDoubleStackSlot()) << source;
1631       EmitParallelMoves(
1632           Location::StackSlot(source.GetStackIndex()),
1633           Location::StackSlot(destination.GetStackIndex()),
1634           DataType::Type::kInt32,
1635           Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1636           Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1637           DataType::Type::kInt32);
1638     }
1639   }
1640 }
1641 
CreateAddress(Register base,Register index=Register::kNoRegister,ScaleFactor scale=TIMES_1,int32_t disp=0)1642 static Address CreateAddress(Register base,
1643                              Register index = Register::kNoRegister,
1644                              ScaleFactor scale = TIMES_1,
1645                              int32_t disp = 0) {
1646   if (index == Register::kNoRegister) {
1647     return Address(base, disp);
1648   }
1649 
1650   return Address(base, index, scale, disp);
1651 }
1652 
LoadFromMemoryNoBarrier(DataType::Type dst_type,Location dst,Address src,HInstruction * instr,XmmRegister temp,bool is_atomic_load)1653 void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
1654                                                Location dst,
1655                                                Address src,
1656                                                HInstruction* instr,
1657                                                XmmRegister temp,
1658                                                bool is_atomic_load) {
1659   switch (dst_type) {
1660     case DataType::Type::kBool:
1661     case DataType::Type::kUint8:
1662       __ movzxb(dst.AsRegister<Register>(), src);
1663       break;
1664     case DataType::Type::kInt8:
1665       __ movsxb(dst.AsRegister<Register>(), src);
1666       break;
1667     case DataType::Type::kInt16:
1668       __ movsxw(dst.AsRegister<Register>(), src);
1669       break;
1670     case DataType::Type::kUint16:
1671       __ movzxw(dst.AsRegister<Register>(), src);
1672       break;
1673     case DataType::Type::kInt32:
1674       __ movl(dst.AsRegister<Register>(), src);
1675       break;
1676     case DataType::Type::kInt64: {
1677       if (is_atomic_load) {
1678         __ movsd(temp, src);
1679         if (instr != nullptr) {
1680           MaybeRecordImplicitNullCheck(instr);
1681         }
1682         __ movd(dst.AsRegisterPairLow<Register>(), temp);
1683         __ psrlq(temp, Immediate(32));
1684         __ movd(dst.AsRegisterPairHigh<Register>(), temp);
1685       } else {
1686         DCHECK_NE(src.GetBaseRegister(), dst.AsRegisterPairLow<Register>());
1687         Address src_high = Address::displace(src, kX86WordSize);
1688         __ movl(dst.AsRegisterPairLow<Register>(), src);
1689         if (instr != nullptr) {
1690           MaybeRecordImplicitNullCheck(instr);
1691         }
1692         __ movl(dst.AsRegisterPairHigh<Register>(), src_high);
1693       }
1694       break;
1695     }
1696     case DataType::Type::kFloat32:
1697       __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1698       break;
1699     case DataType::Type::kFloat64:
1700       __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1701       break;
1702     case DataType::Type::kReference:
1703       DCHECK(!gUseReadBarrier);
1704       __ movl(dst.AsRegister<Register>(), src);
1705       __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
1706       break;
1707     default:
1708       LOG(FATAL) << "Unreachable type " << dst_type;
1709   }
1710   if (instr != nullptr && dst_type != DataType::Type::kInt64) {
1711     // kInt64 needs special handling that is done in the above switch.
1712     MaybeRecordImplicitNullCheck(instr);
1713   }
1714 }
1715 
MoveToMemory(DataType::Type src_type,Location src,Register dst_base,Register dst_index,ScaleFactor dst_scale,int32_t dst_disp)1716 void CodeGeneratorX86::MoveToMemory(DataType::Type src_type,
1717                                     Location src,
1718                                     Register dst_base,
1719                                     Register dst_index,
1720                                     ScaleFactor dst_scale,
1721                                     int32_t dst_disp) {
1722   DCHECK(dst_base != Register::kNoRegister);
1723   Address dst = CreateAddress(dst_base, dst_index, dst_scale, dst_disp);
1724 
1725   switch (src_type) {
1726     case DataType::Type::kBool:
1727     case DataType::Type::kUint8:
1728     case DataType::Type::kInt8: {
1729       if (src.IsConstant()) {
1730         __ movb(dst, Immediate(CodeGenerator::GetInt8ValueOf(src.GetConstant())));
1731       } else {
1732         __ movb(dst, src.AsRegister<ByteRegister>());
1733       }
1734       break;
1735     }
1736     case DataType::Type::kUint16:
1737     case DataType::Type::kInt16: {
1738       if (src.IsConstant()) {
1739         __ movw(dst, Immediate(CodeGenerator::GetInt16ValueOf(src.GetConstant())));
1740       } else {
1741         __ movw(dst, src.AsRegister<Register>());
1742       }
1743       break;
1744     }
1745     case DataType::Type::kUint32:
1746     case DataType::Type::kInt32: {
1747       if (src.IsConstant()) {
1748         int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1749         __ movl(dst, Immediate(v));
1750       } else {
1751         __ movl(dst, src.AsRegister<Register>());
1752       }
1753       break;
1754     }
1755     case DataType::Type::kUint64:
1756     case DataType::Type::kInt64: {
1757       Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1758       if (src.IsConstant()) {
1759         int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1760         __ movl(dst, Immediate(Low32Bits(v)));
1761         __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1762       } else {
1763         __ movl(dst, src.AsRegisterPairLow<Register>());
1764         __ movl(dst_next_4_bytes, src.AsRegisterPairHigh<Register>());
1765       }
1766       break;
1767     }
1768     case DataType::Type::kFloat32: {
1769       if (src.IsConstant()) {
1770         int32_t v = CodeGenerator::GetInt32ValueOf(src.GetConstant());
1771         __ movl(dst, Immediate(v));
1772       } else {
1773         __ movss(dst, src.AsFpuRegister<XmmRegister>());
1774       }
1775       break;
1776     }
1777     case DataType::Type::kFloat64: {
1778       Address dst_next_4_bytes = CreateAddress(dst_base, dst_index, dst_scale, dst_disp + 4);
1779       if (src.IsConstant()) {
1780         int64_t v = CodeGenerator::GetInt64ValueOf(src.GetConstant());
1781         __ movl(dst, Immediate(Low32Bits(v)));
1782         __ movl(dst_next_4_bytes, Immediate(High32Bits(v)));
1783       } else {
1784         __ movsd(dst, src.AsFpuRegister<XmmRegister>());
1785       }
1786       break;
1787     }
1788     case DataType::Type::kVoid:
1789     case DataType::Type::kReference:
1790       LOG(FATAL) << "Unreachable type " << src_type;
1791   }
1792 }
1793 
MoveConstant(Location location,int32_t value)1794 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1795   DCHECK(location.IsRegister());
1796   __ movl(location.AsRegister<Register>(), Immediate(value));
1797 }
1798 
MoveLocation(Location dst,Location src,DataType::Type dst_type)1799 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1800   HParallelMove move(GetGraph()->GetAllocator());
1801   if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1802     move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1803     move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1804   } else {
1805     move.AddMove(src, dst, dst_type, nullptr);
1806   }
1807   GetMoveResolver()->EmitNativeCode(&move);
1808 }
1809 
AddLocationAsTemp(Location location,LocationSummary * locations)1810 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1811   if (location.IsRegister()) {
1812     locations->AddTemp(location);
1813   } else if (location.IsRegisterPair()) {
1814     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1815     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1816   } else {
1817     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1818   }
1819 }
1820 
HandleGoto(HInstruction * got,HBasicBlock * successor)1821 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1822   if (successor->IsExitBlock()) {
1823     DCHECK(got->GetPrevious()->AlwaysThrows());
1824     return;  // no code needed
1825   }
1826 
1827   HBasicBlock* block = got->GetBlock();
1828   HInstruction* previous = got->GetPrevious();
1829 
1830   HLoopInformation* info = block->GetLoopInformation();
1831   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1832     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1833     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1834     return;
1835   }
1836 
1837   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1838     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1839   }
1840   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1841     __ jmp(codegen_->GetLabelOf(successor));
1842   }
1843 }
1844 
VisitGoto(HGoto * got)1845 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1846   got->SetLocations(nullptr);
1847 }
1848 
VisitGoto(HGoto * got)1849 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1850   HandleGoto(got, got->GetSuccessor());
1851 }
1852 
VisitTryBoundary(HTryBoundary * try_boundary)1853 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1854   try_boundary->SetLocations(nullptr);
1855 }
1856 
VisitTryBoundary(HTryBoundary * try_boundary)1857 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1858   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1859   if (!successor->IsExitBlock()) {
1860     HandleGoto(try_boundary, successor);
1861   }
1862 }
1863 
VisitExit(HExit * exit)1864 void LocationsBuilderX86::VisitExit(HExit* exit) {
1865   exit->SetLocations(nullptr);
1866 }
1867 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1868 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1869 }
1870 
1871 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1872 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1873                                                   LabelType* true_label,
1874                                                   LabelType* false_label) {
1875   if (cond->IsFPConditionTrueIfNaN()) {
1876     __ j(kUnordered, true_label);
1877   } else if (cond->IsFPConditionFalseIfNaN()) {
1878     __ j(kUnordered, false_label);
1879   }
1880   __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1881 }
1882 
1883 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1884 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1885                                                                LabelType* true_label,
1886                                                                LabelType* false_label) {
1887   LocationSummary* locations = cond->GetLocations();
1888   Location left = locations->InAt(0);
1889   Location right = locations->InAt(1);
1890   IfCondition if_cond = cond->GetCondition();
1891 
1892   Register left_high = left.AsRegisterPairHigh<Register>();
1893   Register left_low = left.AsRegisterPairLow<Register>();
1894   IfCondition true_high_cond = if_cond;
1895   IfCondition false_high_cond = cond->GetOppositeCondition();
1896   Condition final_condition = X86UnsignedOrFPCondition(if_cond);  // unsigned on lower part
1897 
1898   // Set the conditions for the test, remembering that == needs to be
1899   // decided using the low words.
1900   switch (if_cond) {
1901     case kCondEQ:
1902     case kCondNE:
1903       // Nothing to do.
1904       break;
1905     case kCondLT:
1906       false_high_cond = kCondGT;
1907       break;
1908     case kCondLE:
1909       true_high_cond = kCondLT;
1910       break;
1911     case kCondGT:
1912       false_high_cond = kCondLT;
1913       break;
1914     case kCondGE:
1915       true_high_cond = kCondGT;
1916       break;
1917     case kCondB:
1918       false_high_cond = kCondA;
1919       break;
1920     case kCondBE:
1921       true_high_cond = kCondB;
1922       break;
1923     case kCondA:
1924       false_high_cond = kCondB;
1925       break;
1926     case kCondAE:
1927       true_high_cond = kCondA;
1928       break;
1929   }
1930 
1931   if (right.IsConstant()) {
1932     int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1933     int32_t val_high = High32Bits(value);
1934     int32_t val_low = Low32Bits(value);
1935 
1936     codegen_->Compare32BitValue(left_high, val_high);
1937     if (if_cond == kCondNE) {
1938       __ j(X86Condition(true_high_cond), true_label);
1939     } else if (if_cond == kCondEQ) {
1940       __ j(X86Condition(false_high_cond), false_label);
1941     } else {
1942       __ j(X86Condition(true_high_cond), true_label);
1943       __ j(X86Condition(false_high_cond), false_label);
1944     }
1945     // Must be equal high, so compare the lows.
1946     codegen_->Compare32BitValue(left_low, val_low);
1947   } else if (right.IsRegisterPair()) {
1948     Register right_high = right.AsRegisterPairHigh<Register>();
1949     Register right_low = right.AsRegisterPairLow<Register>();
1950 
1951     __ cmpl(left_high, right_high);
1952     if (if_cond == kCondNE) {
1953       __ j(X86Condition(true_high_cond), true_label);
1954     } else if (if_cond == kCondEQ) {
1955       __ j(X86Condition(false_high_cond), false_label);
1956     } else {
1957       __ j(X86Condition(true_high_cond), true_label);
1958       __ j(X86Condition(false_high_cond), false_label);
1959     }
1960     // Must be equal high, so compare the lows.
1961     __ cmpl(left_low, right_low);
1962   } else {
1963     DCHECK(right.IsDoubleStackSlot());
1964     __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1965     if (if_cond == kCondNE) {
1966       __ j(X86Condition(true_high_cond), true_label);
1967     } else if (if_cond == kCondEQ) {
1968       __ j(X86Condition(false_high_cond), false_label);
1969     } else {
1970       __ j(X86Condition(true_high_cond), true_label);
1971       __ j(X86Condition(false_high_cond), false_label);
1972     }
1973     // Must be equal high, so compare the lows.
1974     __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1975   }
1976   // The last comparison might be unsigned.
1977   __ j(final_condition, true_label);
1978 }
1979 
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1980 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1981                                                     Location rhs,
1982                                                     HInstruction* insn,
1983                                                     bool is_double) {
1984   HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1985   if (is_double) {
1986     if (rhs.IsFpuRegister()) {
1987       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1988     } else if (const_area != nullptr) {
1989       DCHECK(const_area->IsEmittedAtUseSite());
1990       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1991                  codegen_->LiteralDoubleAddress(
1992                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1993                      const_area->GetBaseMethodAddress(),
1994                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1995     } else {
1996       DCHECK(rhs.IsDoubleStackSlot());
1997       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1998     }
1999   } else {
2000     if (rhs.IsFpuRegister()) {
2001       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
2002     } else if (const_area != nullptr) {
2003       DCHECK(const_area->IsEmittedAtUseSite());
2004       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
2005                  codegen_->LiteralFloatAddress(
2006                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
2007                      const_area->GetBaseMethodAddress(),
2008                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
2009     } else {
2010       DCHECK(rhs.IsStackSlot());
2011       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
2012     }
2013   }
2014 }
2015 
2016 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2017 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
2018                                                                LabelType* true_target_in,
2019                                                                LabelType* false_target_in) {
2020   // Generated branching requires both targets to be explicit. If either of the
2021   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2022   LabelType fallthrough_target;
2023   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2024   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2025 
2026   LocationSummary* locations = condition->GetLocations();
2027   Location left = locations->InAt(0);
2028   Location right = locations->InAt(1);
2029 
2030   DataType::Type type = condition->InputAt(0)->GetType();
2031   switch (type) {
2032     case DataType::Type::kInt64:
2033       GenerateLongComparesAndJumps(condition, true_target, false_target);
2034       break;
2035     case DataType::Type::kFloat32:
2036       GenerateFPCompare(left, right, condition, false);
2037       GenerateFPJumps(condition, true_target, false_target);
2038       break;
2039     case DataType::Type::kFloat64:
2040       GenerateFPCompare(left, right, condition, true);
2041       GenerateFPJumps(condition, true_target, false_target);
2042       break;
2043     default:
2044       LOG(FATAL) << "Unexpected compare type " << type;
2045   }
2046 
2047   if (false_target != &fallthrough_target) {
2048     __ jmp(false_target);
2049   }
2050 
2051   if (fallthrough_target.IsLinked()) {
2052     __ Bind(&fallthrough_target);
2053   }
2054 }
2055 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)2056 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
2057   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2058   // are set only strictly before `branch`. We can't use the eflags on long/FP
2059   // conditions if they are materialized due to the complex branching.
2060   return cond->IsCondition() &&
2061          cond->GetNext() == branch &&
2062          cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
2063          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
2064 }
2065 
2066 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2067 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
2068                                                         size_t condition_input_index,
2069                                                         LabelType* true_target,
2070                                                         LabelType* false_target) {
2071   HInstruction* cond = instruction->InputAt(condition_input_index);
2072 
2073   if (true_target == nullptr && false_target == nullptr) {
2074     // Nothing to do. The code always falls through.
2075     return;
2076   } else if (cond->IsIntConstant()) {
2077     // Constant condition, statically compared against "true" (integer value 1).
2078     if (cond->AsIntConstant()->IsTrue()) {
2079       if (true_target != nullptr) {
2080         __ jmp(true_target);
2081       }
2082     } else {
2083       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2084       if (false_target != nullptr) {
2085         __ jmp(false_target);
2086       }
2087     }
2088     return;
2089   }
2090 
2091   // The following code generates these patterns:
2092   //  (1) true_target == nullptr && false_target != nullptr
2093   //        - opposite condition true => branch to false_target
2094   //  (2) true_target != nullptr && false_target == nullptr
2095   //        - condition true => branch to true_target
2096   //  (3) true_target != nullptr && false_target != nullptr
2097   //        - condition true => branch to true_target
2098   //        - branch to false_target
2099   if (IsBooleanValueOrMaterializedCondition(cond)) {
2100     if (AreEflagsSetFrom(cond, instruction)) {
2101       if (true_target == nullptr) {
2102         __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
2103       } else {
2104         __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
2105       }
2106     } else {
2107       // Materialized condition, compare against 0.
2108       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2109       if (lhs.IsRegister()) {
2110         __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
2111       } else {
2112         __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
2113       }
2114       if (true_target == nullptr) {
2115         __ j(kEqual, false_target);
2116       } else {
2117         __ j(kNotEqual, true_target);
2118       }
2119     }
2120   } else {
2121     // Condition has not been materialized, use its inputs as the comparison and
2122     // its condition as the branch condition.
2123     HCondition* condition = cond->AsCondition();
2124 
2125     // If this is a long or FP comparison that has been folded into
2126     // the HCondition, generate the comparison directly.
2127     DataType::Type type = condition->InputAt(0)->GetType();
2128     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2129       GenerateCompareTestAndBranch(condition, true_target, false_target);
2130       return;
2131     }
2132 
2133     Location lhs = condition->GetLocations()->InAt(0);
2134     Location rhs = condition->GetLocations()->InAt(1);
2135     // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
2136     codegen_->GenerateIntCompare(lhs, rhs);
2137     if (true_target == nullptr) {
2138       __ j(X86Condition(condition->GetOppositeCondition()), false_target);
2139     } else {
2140       __ j(X86Condition(condition->GetCondition()), true_target);
2141     }
2142   }
2143 
2144   // If neither branch falls through (case 3), the conditional branch to `true_target`
2145   // was already emitted (case 2) and we need to emit a jump to `false_target`.
2146   if (true_target != nullptr && false_target != nullptr) {
2147     __ jmp(false_target);
2148   }
2149 }
2150 
VisitIf(HIf * if_instr)2151 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
2152   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2153   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2154     locations->SetInAt(0, Location::Any());
2155   }
2156 }
2157 
VisitIf(HIf * if_instr)2158 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
2159   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2160   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2161   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2162       nullptr : codegen_->GetLabelOf(true_successor);
2163   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2164       nullptr : codegen_->GetLabelOf(false_successor);
2165   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2166 }
2167 
VisitDeoptimize(HDeoptimize * deoptimize)2168 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2169   LocationSummary* locations = new (GetGraph()->GetAllocator())
2170       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2171   InvokeRuntimeCallingConvention calling_convention;
2172   RegisterSet caller_saves = RegisterSet::Empty();
2173   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2174   locations->SetCustomSlowPathCallerSaves(caller_saves);
2175   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2176     locations->SetInAt(0, Location::Any());
2177   }
2178 }
2179 
VisitDeoptimize(HDeoptimize * deoptimize)2180 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
2181   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
2182   GenerateTestAndBranch<Label>(deoptimize,
2183                                /* condition_input_index= */ 0,
2184                                slow_path->GetEntryLabel(),
2185                                /* false_target= */ nullptr);
2186 }
2187 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2188 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2189   LocationSummary* locations = new (GetGraph()->GetAllocator())
2190       LocationSummary(flag, LocationSummary::kNoCall);
2191   locations->SetOut(Location::RequiresRegister());
2192 }
2193 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2194 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2195   __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
2196           Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2197 }
2198 
SelectCanUseCMOV(HSelect * select)2199 static bool SelectCanUseCMOV(HSelect* select) {
2200   // There are no conditional move instructions for XMMs.
2201   if (DataType::IsFloatingPointType(select->GetType())) {
2202     return false;
2203   }
2204 
2205   // A FP condition doesn't generate the single CC that we need.
2206   // In 32 bit mode, a long condition doesn't generate a single CC either.
2207   HInstruction* condition = select->GetCondition();
2208   if (condition->IsCondition()) {
2209     DataType::Type compare_type = condition->InputAt(0)->GetType();
2210     if (compare_type == DataType::Type::kInt64 ||
2211         DataType::IsFloatingPointType(compare_type)) {
2212       return false;
2213     }
2214   }
2215 
2216   // We can generate a CMOV for this Select.
2217   return true;
2218 }
2219 
VisitSelect(HSelect * select)2220 void LocationsBuilderX86::VisitSelect(HSelect* select) {
2221   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2222   if (DataType::IsFloatingPointType(select->GetType())) {
2223     locations->SetInAt(0, Location::RequiresFpuRegister());
2224     locations->SetInAt(1, Location::Any());
2225   } else {
2226     locations->SetInAt(0, Location::RequiresRegister());
2227     if (SelectCanUseCMOV(select)) {
2228       if (select->InputAt(1)->IsConstant()) {
2229         // Cmov can't handle a constant value.
2230         locations->SetInAt(1, Location::RequiresRegister());
2231       } else {
2232         locations->SetInAt(1, Location::Any());
2233       }
2234     } else {
2235       locations->SetInAt(1, Location::Any());
2236     }
2237   }
2238   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2239     locations->SetInAt(2, Location::RequiresRegister());
2240   }
2241   locations->SetOut(Location::SameAsFirstInput());
2242 }
2243 
VisitSelect(HSelect * select)2244 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
2245   LocationSummary* locations = select->GetLocations();
2246   DCHECK(locations->InAt(0).Equals(locations->Out()));
2247   if (SelectCanUseCMOV(select)) {
2248     // If both the condition and the source types are integer, we can generate
2249     // a CMOV to implement Select.
2250 
2251     HInstruction* select_condition = select->GetCondition();
2252     Condition cond = kNotEqual;
2253 
2254     // Figure out how to test the 'condition'.
2255     if (select_condition->IsCondition()) {
2256       HCondition* condition = select_condition->AsCondition();
2257       if (!condition->IsEmittedAtUseSite()) {
2258         // This was a previously materialized condition.
2259         // Can we use the existing condition code?
2260         if (AreEflagsSetFrom(condition, select)) {
2261           // Materialization was the previous instruction. Condition codes are right.
2262           cond = X86Condition(condition->GetCondition());
2263         } else {
2264           // No, we have to recreate the condition code.
2265           Register cond_reg = locations->InAt(2).AsRegister<Register>();
2266           __ testl(cond_reg, cond_reg);
2267         }
2268       } else {
2269         // We can't handle FP or long here.
2270         DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
2271         DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
2272         LocationSummary* cond_locations = condition->GetLocations();
2273         codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
2274         cond = X86Condition(condition->GetCondition());
2275       }
2276     } else {
2277       // Must be a Boolean condition, which needs to be compared to 0.
2278       Register cond_reg = locations->InAt(2).AsRegister<Register>();
2279       __ testl(cond_reg, cond_reg);
2280     }
2281 
2282     // If the condition is true, overwrite the output, which already contains false.
2283     Location false_loc = locations->InAt(0);
2284     Location true_loc = locations->InAt(1);
2285     if (select->GetType() == DataType::Type::kInt64) {
2286       // 64 bit conditional move.
2287       Register false_high = false_loc.AsRegisterPairHigh<Register>();
2288       Register false_low = false_loc.AsRegisterPairLow<Register>();
2289       if (true_loc.IsRegisterPair()) {
2290         __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
2291         __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
2292       } else {
2293         __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
2294         __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
2295       }
2296     } else {
2297       // 32 bit conditional move.
2298       Register false_reg = false_loc.AsRegister<Register>();
2299       if (true_loc.IsRegister()) {
2300         __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
2301       } else {
2302         __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
2303       }
2304     }
2305   } else {
2306     NearLabel false_target;
2307     GenerateTestAndBranch<NearLabel>(
2308         select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
2309     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2310     __ Bind(&false_target);
2311   }
2312 }
2313 
VisitNop(HNop * nop)2314 void LocationsBuilderX86::VisitNop(HNop* nop) {
2315   new (GetGraph()->GetAllocator()) LocationSummary(nop);
2316 }
2317 
VisitNop(HNop *)2318 void InstructionCodeGeneratorX86::VisitNop(HNop*) {
2319   // The environment recording already happened in CodeGenerator::Compile.
2320 }
2321 
IncreaseFrame(size_t adjustment)2322 void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
2323   __ subl(ESP, Immediate(adjustment));
2324   __ cfi().AdjustCFAOffset(adjustment);
2325 }
2326 
DecreaseFrame(size_t adjustment)2327 void CodeGeneratorX86::DecreaseFrame(size_t adjustment) {
2328   __ addl(ESP, Immediate(adjustment));
2329   __ cfi().AdjustCFAOffset(-adjustment);
2330 }
2331 
GenerateNop()2332 void CodeGeneratorX86::GenerateNop() {
2333   __ nop();
2334 }
2335 
HandleCondition(HCondition * cond)2336 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
2337   LocationSummary* locations =
2338       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2339   // Handle the long/FP comparisons made in instruction simplification.
2340   switch (cond->InputAt(0)->GetType()) {
2341     case DataType::Type::kInt64: {
2342       locations->SetInAt(0, Location::RequiresRegister());
2343       locations->SetInAt(1, Location::Any());
2344       if (!cond->IsEmittedAtUseSite()) {
2345         locations->SetOut(Location::RequiresRegister());
2346       }
2347       break;
2348     }
2349     case DataType::Type::kFloat32:
2350     case DataType::Type::kFloat64: {
2351       locations->SetInAt(0, Location::RequiresFpuRegister());
2352       if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
2353         DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
2354       } else if (cond->InputAt(1)->IsConstant()) {
2355         locations->SetInAt(1, Location::RequiresFpuRegister());
2356       } else {
2357         locations->SetInAt(1, Location::Any());
2358       }
2359       if (!cond->IsEmittedAtUseSite()) {
2360         locations->SetOut(Location::RequiresRegister());
2361       }
2362       break;
2363     }
2364     default:
2365       locations->SetInAt(0, Location::RequiresRegister());
2366       locations->SetInAt(1, Location::Any());
2367       if (!cond->IsEmittedAtUseSite()) {
2368         // We need a byte register.
2369         locations->SetOut(Location::RegisterLocation(ECX));
2370       }
2371       break;
2372   }
2373 }
2374 
HandleCondition(HCondition * cond)2375 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
2376   if (cond->IsEmittedAtUseSite()) {
2377     return;
2378   }
2379 
2380   LocationSummary* locations = cond->GetLocations();
2381   Location lhs = locations->InAt(0);
2382   Location rhs = locations->InAt(1);
2383   Register reg = locations->Out().AsRegister<Register>();
2384   NearLabel true_label, false_label;
2385 
2386   switch (cond->InputAt(0)->GetType()) {
2387     default: {
2388       // Integer case.
2389 
2390       // Clear output register: setb only sets the low byte.
2391       __ xorl(reg, reg);
2392       codegen_->GenerateIntCompare(lhs, rhs);
2393       __ setb(X86Condition(cond->GetCondition()), reg);
2394       return;
2395     }
2396     case DataType::Type::kInt64:
2397       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2398       break;
2399     case DataType::Type::kFloat32:
2400       GenerateFPCompare(lhs, rhs, cond, false);
2401       GenerateFPJumps(cond, &true_label, &false_label);
2402       break;
2403     case DataType::Type::kFloat64:
2404       GenerateFPCompare(lhs, rhs, cond, true);
2405       GenerateFPJumps(cond, &true_label, &false_label);
2406       break;
2407   }
2408 
2409   // Convert the jumps into the result.
2410   NearLabel done_label;
2411 
2412   // False case: result = 0.
2413   __ Bind(&false_label);
2414   __ xorl(reg, reg);
2415   __ jmp(&done_label);
2416 
2417   // True case: result = 1.
2418   __ Bind(&true_label);
2419   __ movl(reg, Immediate(1));
2420   __ Bind(&done_label);
2421 }
2422 
VisitEqual(HEqual * comp)2423 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2424   HandleCondition(comp);
2425 }
2426 
VisitEqual(HEqual * comp)2427 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2428   HandleCondition(comp);
2429 }
2430 
VisitNotEqual(HNotEqual * comp)2431 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2432   HandleCondition(comp);
2433 }
2434 
VisitNotEqual(HNotEqual * comp)2435 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2436   HandleCondition(comp);
2437 }
2438 
VisitLessThan(HLessThan * comp)2439 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2440   HandleCondition(comp);
2441 }
2442 
VisitLessThan(HLessThan * comp)2443 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2444   HandleCondition(comp);
2445 }
2446 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2447 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2448   HandleCondition(comp);
2449 }
2450 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2451 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2452   HandleCondition(comp);
2453 }
2454 
VisitGreaterThan(HGreaterThan * comp)2455 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2456   HandleCondition(comp);
2457 }
2458 
VisitGreaterThan(HGreaterThan * comp)2459 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2460   HandleCondition(comp);
2461 }
2462 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2463 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2464   HandleCondition(comp);
2465 }
2466 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2467 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2468   HandleCondition(comp);
2469 }
2470 
VisitBelow(HBelow * comp)2471 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2472   HandleCondition(comp);
2473 }
2474 
VisitBelow(HBelow * comp)2475 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2476   HandleCondition(comp);
2477 }
2478 
VisitBelowOrEqual(HBelowOrEqual * comp)2479 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2480   HandleCondition(comp);
2481 }
2482 
VisitBelowOrEqual(HBelowOrEqual * comp)2483 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2484   HandleCondition(comp);
2485 }
2486 
VisitAbove(HAbove * comp)2487 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2488   HandleCondition(comp);
2489 }
2490 
VisitAbove(HAbove * comp)2491 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2492   HandleCondition(comp);
2493 }
2494 
VisitAboveOrEqual(HAboveOrEqual * comp)2495 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2496   HandleCondition(comp);
2497 }
2498 
VisitAboveOrEqual(HAboveOrEqual * comp)2499 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2500   HandleCondition(comp);
2501 }
2502 
VisitIntConstant(HIntConstant * constant)2503 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2504   LocationSummary* locations =
2505       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2506   locations->SetOut(Location::ConstantLocation(constant));
2507 }
2508 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2509 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2510   // Will be generated at use site.
2511 }
2512 
VisitNullConstant(HNullConstant * constant)2513 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2514   LocationSummary* locations =
2515       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2516   locations->SetOut(Location::ConstantLocation(constant));
2517 }
2518 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2519 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2520   // Will be generated at use site.
2521 }
2522 
VisitLongConstant(HLongConstant * constant)2523 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2524   LocationSummary* locations =
2525       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2526   locations->SetOut(Location::ConstantLocation(constant));
2527 }
2528 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2529 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2530   // Will be generated at use site.
2531 }
2532 
VisitFloatConstant(HFloatConstant * constant)2533 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2534   LocationSummary* locations =
2535       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2536   locations->SetOut(Location::ConstantLocation(constant));
2537 }
2538 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2539 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2540   // Will be generated at use site.
2541 }
2542 
VisitDoubleConstant(HDoubleConstant * constant)2543 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2544   LocationSummary* locations =
2545       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2546   locations->SetOut(Location::ConstantLocation(constant));
2547 }
2548 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2549 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2550   // Will be generated at use site.
2551 }
2552 
VisitConstructorFence(HConstructorFence * constructor_fence)2553 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2554   constructor_fence->SetLocations(nullptr);
2555 }
2556 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2557 void InstructionCodeGeneratorX86::VisitConstructorFence(
2558     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2559   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2560 }
2561 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2562 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2563   memory_barrier->SetLocations(nullptr);
2564 }
2565 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2566 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2567   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2568 }
2569 
VisitReturnVoid(HReturnVoid * ret)2570 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2571   ret->SetLocations(nullptr);
2572 }
2573 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2574 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2575   codegen_->GenerateFrameExit();
2576 }
2577 
VisitReturn(HReturn * ret)2578 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2579   LocationSummary* locations =
2580       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2581   SetInForReturnValue(ret, locations);
2582 }
2583 
VisitReturn(HReturn * ret)2584 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2585   switch (ret->InputAt(0)->GetType()) {
2586     case DataType::Type::kReference:
2587     case DataType::Type::kBool:
2588     case DataType::Type::kUint8:
2589     case DataType::Type::kInt8:
2590     case DataType::Type::kUint16:
2591     case DataType::Type::kInt16:
2592     case DataType::Type::kInt32:
2593       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2594       break;
2595 
2596     case DataType::Type::kInt64:
2597       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2598       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2599       break;
2600 
2601     case DataType::Type::kFloat32:
2602       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2603       if (GetGraph()->IsCompilingOsr()) {
2604         // To simplify callers of an OSR method, we put the return value in both
2605         // floating point and core registers.
2606         __ movd(EAX, XMM0);
2607       }
2608       break;
2609 
2610     case DataType::Type::kFloat64:
2611       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2612       if (GetGraph()->IsCompilingOsr()) {
2613         // To simplify callers of an OSR method, we put the return value in both
2614         // floating point and core registers.
2615         __ movd(EAX, XMM0);
2616         // Use XMM1 as temporary register to not clobber XMM0.
2617         __ movaps(XMM1, XMM0);
2618         __ psrlq(XMM1, Immediate(32));
2619         __ movd(EDX, XMM1);
2620       }
2621       break;
2622 
2623     default:
2624       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2625   }
2626   codegen_->GenerateFrameExit();
2627 }
2628 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2629 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2630   // The trampoline uses the same calling convention as dex calling conventions,
2631   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2632   // the method_idx.
2633   HandleInvoke(invoke);
2634 }
2635 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2636 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2637   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2638 }
2639 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2640 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2641   // Explicit clinit checks triggered by static invokes must have been pruned by
2642   // art::PrepareForRegisterAllocation.
2643   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2644 
2645   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2646   if (intrinsic.TryDispatch(invoke)) {
2647     if (invoke->GetLocations()->CanCall() &&
2648         invoke->HasPcRelativeMethodLoadKind() &&
2649         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2650       invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2651     }
2652     return;
2653   }
2654 
2655   if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2656     CriticalNativeCallingConventionVisitorX86 calling_convention_visitor(
2657         /*for_register_allocation=*/ true);
2658     CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2659   } else {
2660     HandleInvoke(invoke);
2661   }
2662 
2663   // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2664   if (invoke->HasPcRelativeMethodLoadKind()) {
2665     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2666   }
2667 }
2668 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2669 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2670   if (invoke->GetLocations()->Intrinsified()) {
2671     IntrinsicCodeGeneratorX86 intrinsic(codegen);
2672     intrinsic.Dispatch(invoke);
2673     return true;
2674   }
2675   return false;
2676 }
2677 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2678 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2679   // Explicit clinit checks triggered by static invokes must have been pruned by
2680   // art::PrepareForRegisterAllocation.
2681   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2682 
2683   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2684     return;
2685   }
2686 
2687   LocationSummary* locations = invoke->GetLocations();
2688   codegen_->GenerateStaticOrDirectCall(
2689       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2690 }
2691 
VisitInvokeVirtual(HInvokeVirtual * invoke)2692 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2693   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2694   if (intrinsic.TryDispatch(invoke)) {
2695     return;
2696   }
2697 
2698   HandleInvoke(invoke);
2699 
2700   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2701     // Add one temporary for inline cache update.
2702     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2703   }
2704 }
2705 
HandleInvoke(HInvoke * invoke)2706 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2707   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2708   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2709 }
2710 
VisitInvokeVirtual(HInvokeVirtual * invoke)2711 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2712   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2713     return;
2714   }
2715 
2716   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2717   DCHECK(!codegen_->IsLeafMethod());
2718 }
2719 
VisitInvokeInterface(HInvokeInterface * invoke)2720 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2721   // This call to HandleInvoke allocates a temporary (core) register
2722   // which is also used to transfer the hidden argument from FP to
2723   // core register.
2724   HandleInvoke(invoke);
2725   // Add the hidden argument.
2726   invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2727 
2728   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2729     // Add one temporary for inline cache update.
2730     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2731   }
2732 
2733   // For PC-relative load kinds the invoke has an extra input, the PC-relative address base.
2734   if (IsPcRelativeMethodLoadKind(invoke->GetHiddenArgumentLoadKind())) {
2735     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2736   }
2737 
2738   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2739     invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2740                                     Location::RequiresRegister());
2741   }
2742 }
2743 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2744 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2745   DCHECK_EQ(EAX, klass);
2746   // We know the destination of an intrinsic, so no need to record inline
2747   // caches (also the intrinsic location builder doesn't request an additional
2748   // temporary).
2749   if (!instruction->GetLocations()->Intrinsified() &&
2750       GetGraph()->IsCompilingBaseline() &&
2751       !Runtime::Current()->IsAotCompiler()) {
2752     DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
2753     ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2754     DCHECK(info != nullptr);
2755     InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2756     uint32_t address = reinterpret_cast32<uint32_t>(cache);
2757     if (kIsDebugBuild) {
2758       uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2759       CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2760     }
2761     Register temp = EBP;
2762     NearLabel done;
2763     __ movl(temp, Immediate(address));
2764     // Fast path for a monomorphic cache.
2765     __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2766     __ j(kEqual, &done);
2767     GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2768     __ Bind(&done);
2769   }
2770 }
2771 
VisitInvokeInterface(HInvokeInterface * invoke)2772 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2773   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2774   LocationSummary* locations = invoke->GetLocations();
2775   Register temp = locations->GetTemp(0).AsRegister<Register>();
2776   XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2777   Location receiver = locations->InAt(0);
2778   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2779 
2780   // Set the hidden argument. This is safe to do this here, as XMM7
2781   // won't be modified thereafter, before the `call` instruction.
2782   DCHECK_EQ(XMM7, hidden_reg);
2783   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2784     __ movd(hidden_reg, locations->InAt(invoke->GetNumberOfArguments() - 1).AsRegister<Register>());
2785   } else if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2786     codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), locations->GetTemp(0), invoke);
2787     __ movd(hidden_reg, temp);
2788   }
2789 
2790   if (receiver.IsStackSlot()) {
2791     __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2792     // /* HeapReference<Class> */ temp = temp->klass_
2793     __ movl(temp, Address(temp, class_offset));
2794   } else {
2795     // /* HeapReference<Class> */ temp = receiver->klass_
2796     __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2797   }
2798   codegen_->MaybeRecordImplicitNullCheck(invoke);
2799   // Instead of simply (possibly) unpoisoning `temp` here, we should
2800   // emit a read barrier for the previous class reference load.
2801   // However this is not required in practice, as this is an
2802   // intermediate/temporary reference and because the current
2803   // concurrent copying collector keeps the from-space memory
2804   // intact/accessible until the end of the marking phase (the
2805   // concurrent copying collector may not in the future).
2806   __ MaybeUnpoisonHeapReference(temp);
2807 
2808   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2809 
2810   // temp = temp->GetAddressOfIMT()
2811   __ movl(temp,
2812       Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2813   // temp = temp->GetImtEntryAt(method_offset);
2814   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2815       invoke->GetImtIndex(), kX86PointerSize));
2816   __ movl(temp, Address(temp, method_offset));
2817   if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2818     // We pass the method from the IMT in case of a conflict. This will ensure
2819     // we go into the runtime to resolve the actual method.
2820     __ movd(hidden_reg, temp);
2821   }
2822   // call temp->GetEntryPoint();
2823   __ call(Address(temp,
2824                   ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2825 
2826   DCHECK(!codegen_->IsLeafMethod());
2827   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2828 }
2829 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2830 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2831   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2832   if (intrinsic.TryDispatch(invoke)) {
2833     return;
2834   }
2835   HandleInvoke(invoke);
2836 }
2837 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2838 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2839   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2840     return;
2841   }
2842   codegen_->GenerateInvokePolymorphicCall(invoke);
2843 }
2844 
VisitInvokeCustom(HInvokeCustom * invoke)2845 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2846   HandleInvoke(invoke);
2847 }
2848 
VisitInvokeCustom(HInvokeCustom * invoke)2849 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2850   codegen_->GenerateInvokeCustomCall(invoke);
2851 }
2852 
VisitNeg(HNeg * neg)2853 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2854   LocationSummary* locations =
2855       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2856   switch (neg->GetResultType()) {
2857     case DataType::Type::kInt32:
2858     case DataType::Type::kInt64:
2859       locations->SetInAt(0, Location::RequiresRegister());
2860       locations->SetOut(Location::SameAsFirstInput());
2861       break;
2862 
2863     case DataType::Type::kFloat32:
2864       locations->SetInAt(0, Location::RequiresFpuRegister());
2865       locations->SetOut(Location::SameAsFirstInput());
2866       locations->AddTemp(Location::RequiresRegister());
2867       locations->AddTemp(Location::RequiresFpuRegister());
2868       break;
2869 
2870     case DataType::Type::kFloat64:
2871       locations->SetInAt(0, Location::RequiresFpuRegister());
2872       locations->SetOut(Location::SameAsFirstInput());
2873       locations->AddTemp(Location::RequiresFpuRegister());
2874       break;
2875 
2876     default:
2877       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2878   }
2879 }
2880 
VisitNeg(HNeg * neg)2881 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2882   LocationSummary* locations = neg->GetLocations();
2883   Location out = locations->Out();
2884   Location in = locations->InAt(0);
2885   switch (neg->GetResultType()) {
2886     case DataType::Type::kInt32:
2887       DCHECK(in.IsRegister());
2888       DCHECK(in.Equals(out));
2889       __ negl(out.AsRegister<Register>());
2890       break;
2891 
2892     case DataType::Type::kInt64:
2893       DCHECK(in.IsRegisterPair());
2894       DCHECK(in.Equals(out));
2895       __ negl(out.AsRegisterPairLow<Register>());
2896       // Negation is similar to subtraction from zero.  The least
2897       // significant byte triggers a borrow when it is different from
2898       // zero; to take it into account, add 1 to the most significant
2899       // byte if the carry flag (CF) is set to 1 after the first NEGL
2900       // operation.
2901       __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2902       __ negl(out.AsRegisterPairHigh<Register>());
2903       break;
2904 
2905     case DataType::Type::kFloat32: {
2906       DCHECK(in.Equals(out));
2907       Register constant = locations->GetTemp(0).AsRegister<Register>();
2908       XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2909       // Implement float negation with an exclusive or with value
2910       // 0x80000000 (mask for bit 31, representing the sign of a
2911       // single-precision floating-point number).
2912       __ movl(constant, Immediate(INT32_C(0x80000000)));
2913       __ movd(mask, constant);
2914       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2915       break;
2916     }
2917 
2918     case DataType::Type::kFloat64: {
2919       DCHECK(in.Equals(out));
2920       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2921       // Implement double negation with an exclusive or with value
2922       // 0x8000000000000000 (mask for bit 63, representing the sign of
2923       // a double-precision floating-point number).
2924       __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2925       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2926       break;
2927     }
2928 
2929     default:
2930       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2931   }
2932 }
2933 
VisitX86FPNeg(HX86FPNeg * neg)2934 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2935   LocationSummary* locations =
2936       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2937   DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2938   locations->SetInAt(0, Location::RequiresFpuRegister());
2939   locations->SetInAt(1, Location::RequiresRegister());
2940   locations->SetOut(Location::SameAsFirstInput());
2941   locations->AddTemp(Location::RequiresFpuRegister());
2942 }
2943 
VisitX86FPNeg(HX86FPNeg * neg)2944 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2945   LocationSummary* locations = neg->GetLocations();
2946   Location out = locations->Out();
2947   DCHECK(locations->InAt(0).Equals(out));
2948 
2949   Register constant_area = locations->InAt(1).AsRegister<Register>();
2950   XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2951   if (neg->GetType() == DataType::Type::kFloat32) {
2952     __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2953                                                  neg->GetBaseMethodAddress(),
2954                                                  constant_area));
2955     __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2956   } else {
2957      __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2958                                                   neg->GetBaseMethodAddress(),
2959                                                   constant_area));
2960      __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2961   }
2962 }
2963 
VisitTypeConversion(HTypeConversion * conversion)2964 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2965   DataType::Type result_type = conversion->GetResultType();
2966   DataType::Type input_type = conversion->GetInputType();
2967   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2968       << input_type << " -> " << result_type;
2969 
2970   // The float-to-long and double-to-long type conversions rely on a
2971   // call to the runtime.
2972   LocationSummary::CallKind call_kind =
2973       ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2974        && result_type == DataType::Type::kInt64)
2975       ? LocationSummary::kCallOnMainOnly
2976       : LocationSummary::kNoCall;
2977   LocationSummary* locations =
2978       new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2979 
2980   switch (result_type) {
2981     case DataType::Type::kUint8:
2982     case DataType::Type::kInt8:
2983       switch (input_type) {
2984         case DataType::Type::kUint8:
2985         case DataType::Type::kInt8:
2986         case DataType::Type::kUint16:
2987         case DataType::Type::kInt16:
2988         case DataType::Type::kInt32:
2989           locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2990           // Make the output overlap to please the register allocator. This greatly simplifies
2991           // the validation of the linear scan implementation
2992           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2993           break;
2994         case DataType::Type::kInt64: {
2995           HInstruction* input = conversion->InputAt(0);
2996           Location input_location = input->IsConstant()
2997               ? Location::ConstantLocation(input)
2998               : Location::RegisterPairLocation(EAX, EDX);
2999           locations->SetInAt(0, input_location);
3000           // Make the output overlap to please the register allocator. This greatly simplifies
3001           // the validation of the linear scan implementation
3002           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3003           break;
3004         }
3005 
3006         default:
3007           LOG(FATAL) << "Unexpected type conversion from " << input_type
3008                      << " to " << result_type;
3009       }
3010       break;
3011 
3012     case DataType::Type::kUint16:
3013     case DataType::Type::kInt16:
3014       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3015       locations->SetInAt(0, Location::Any());
3016       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3017       break;
3018 
3019     case DataType::Type::kInt32:
3020       switch (input_type) {
3021         case DataType::Type::kInt64:
3022           locations->SetInAt(0, Location::Any());
3023           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3024           break;
3025 
3026         case DataType::Type::kFloat32:
3027           locations->SetInAt(0, Location::RequiresFpuRegister());
3028           locations->SetOut(Location::RequiresRegister());
3029           locations->AddTemp(Location::RequiresFpuRegister());
3030           break;
3031 
3032         case DataType::Type::kFloat64:
3033           locations->SetInAt(0, Location::RequiresFpuRegister());
3034           locations->SetOut(Location::RequiresRegister());
3035           locations->AddTemp(Location::RequiresFpuRegister());
3036           break;
3037 
3038         default:
3039           LOG(FATAL) << "Unexpected type conversion from " << input_type
3040                      << " to " << result_type;
3041       }
3042       break;
3043 
3044     case DataType::Type::kInt64:
3045       switch (input_type) {
3046         case DataType::Type::kBool:
3047         case DataType::Type::kUint8:
3048         case DataType::Type::kInt8:
3049         case DataType::Type::kUint16:
3050         case DataType::Type::kInt16:
3051         case DataType::Type::kInt32:
3052           locations->SetInAt(0, Location::RegisterLocation(EAX));
3053           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3054           break;
3055 
3056         case DataType::Type::kFloat32:
3057         case DataType::Type::kFloat64: {
3058           InvokeRuntimeCallingConvention calling_convention;
3059           XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
3060           locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
3061 
3062           // The runtime helper puts the result in EAX, EDX.
3063           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3064         }
3065         break;
3066 
3067         default:
3068           LOG(FATAL) << "Unexpected type conversion from " << input_type
3069                      << " to " << result_type;
3070       }
3071       break;
3072 
3073     case DataType::Type::kFloat32:
3074       switch (input_type) {
3075         case DataType::Type::kBool:
3076         case DataType::Type::kUint8:
3077         case DataType::Type::kInt8:
3078         case DataType::Type::kUint16:
3079         case DataType::Type::kInt16:
3080         case DataType::Type::kInt32:
3081           locations->SetInAt(0, Location::RequiresRegister());
3082           locations->SetOut(Location::RequiresFpuRegister());
3083           break;
3084 
3085         case DataType::Type::kInt64:
3086           locations->SetInAt(0, Location::Any());
3087           locations->SetOut(Location::Any());
3088           break;
3089 
3090         case DataType::Type::kFloat64:
3091           locations->SetInAt(0, Location::RequiresFpuRegister());
3092           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3093           break;
3094 
3095         default:
3096           LOG(FATAL) << "Unexpected type conversion from " << input_type
3097                      << " to " << result_type;
3098       }
3099       break;
3100 
3101     case DataType::Type::kFloat64:
3102       switch (input_type) {
3103         case DataType::Type::kBool:
3104         case DataType::Type::kUint8:
3105         case DataType::Type::kInt8:
3106         case DataType::Type::kUint16:
3107         case DataType::Type::kInt16:
3108         case DataType::Type::kInt32:
3109           locations->SetInAt(0, Location::RequiresRegister());
3110           locations->SetOut(Location::RequiresFpuRegister());
3111           break;
3112 
3113         case DataType::Type::kInt64:
3114           locations->SetInAt(0, Location::Any());
3115           locations->SetOut(Location::Any());
3116           break;
3117 
3118         case DataType::Type::kFloat32:
3119           locations->SetInAt(0, Location::RequiresFpuRegister());
3120           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3121           break;
3122 
3123         default:
3124           LOG(FATAL) << "Unexpected type conversion from " << input_type
3125                      << " to " << result_type;
3126       }
3127       break;
3128 
3129     default:
3130       LOG(FATAL) << "Unexpected type conversion from " << input_type
3131                  << " to " << result_type;
3132   }
3133 }
3134 
VisitTypeConversion(HTypeConversion * conversion)3135 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
3136   LocationSummary* locations = conversion->GetLocations();
3137   Location out = locations->Out();
3138   Location in = locations->InAt(0);
3139   DataType::Type result_type = conversion->GetResultType();
3140   DataType::Type input_type = conversion->GetInputType();
3141   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3142       << input_type << " -> " << result_type;
3143   switch (result_type) {
3144     case DataType::Type::kUint8:
3145       switch (input_type) {
3146         case DataType::Type::kInt8:
3147         case DataType::Type::kUint16:
3148         case DataType::Type::kInt16:
3149         case DataType::Type::kInt32:
3150           if (in.IsRegister()) {
3151             __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3152           } else {
3153             DCHECK(in.GetConstant()->IsIntConstant());
3154             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3155             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3156           }
3157           break;
3158         case DataType::Type::kInt64:
3159           if (in.IsRegisterPair()) {
3160             __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3161           } else {
3162             DCHECK(in.GetConstant()->IsLongConstant());
3163             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3164             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
3165           }
3166           break;
3167 
3168         default:
3169           LOG(FATAL) << "Unexpected type conversion from " << input_type
3170                      << " to " << result_type;
3171       }
3172       break;
3173 
3174     case DataType::Type::kInt8:
3175       switch (input_type) {
3176         case DataType::Type::kUint8:
3177         case DataType::Type::kUint16:
3178         case DataType::Type::kInt16:
3179         case DataType::Type::kInt32:
3180           if (in.IsRegister()) {
3181             __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
3182           } else {
3183             DCHECK(in.GetConstant()->IsIntConstant());
3184             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3185             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3186           }
3187           break;
3188         case DataType::Type::kInt64:
3189           if (in.IsRegisterPair()) {
3190             __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
3191           } else {
3192             DCHECK(in.GetConstant()->IsLongConstant());
3193             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3194             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
3195           }
3196           break;
3197 
3198         default:
3199           LOG(FATAL) << "Unexpected type conversion from " << input_type
3200                      << " to " << result_type;
3201       }
3202       break;
3203 
3204     case DataType::Type::kUint16:
3205       switch (input_type) {
3206         case DataType::Type::kInt8:
3207         case DataType::Type::kInt16:
3208         case DataType::Type::kInt32:
3209           if (in.IsRegister()) {
3210             __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3211           } else if (in.IsStackSlot()) {
3212             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3213           } else {
3214             DCHECK(in.GetConstant()->IsIntConstant());
3215             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3216             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3217           }
3218           break;
3219         case DataType::Type::kInt64:
3220           if (in.IsRegisterPair()) {
3221             __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3222           } else if (in.IsDoubleStackSlot()) {
3223             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3224           } else {
3225             DCHECK(in.GetConstant()->IsLongConstant());
3226             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3227             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
3228           }
3229           break;
3230 
3231         default:
3232           LOG(FATAL) << "Unexpected type conversion from " << input_type
3233                      << " to " << result_type;
3234       }
3235       break;
3236 
3237     case DataType::Type::kInt16:
3238       switch (input_type) {
3239         case DataType::Type::kUint16:
3240         case DataType::Type::kInt32:
3241           if (in.IsRegister()) {
3242             __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
3243           } else if (in.IsStackSlot()) {
3244             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3245           } else {
3246             DCHECK(in.GetConstant()->IsIntConstant());
3247             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
3248             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3249           }
3250           break;
3251         case DataType::Type::kInt64:
3252           if (in.IsRegisterPair()) {
3253             __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3254           } else if (in.IsDoubleStackSlot()) {
3255             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3256           } else {
3257             DCHECK(in.GetConstant()->IsLongConstant());
3258             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3259             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
3260           }
3261           break;
3262 
3263         default:
3264           LOG(FATAL) << "Unexpected type conversion from " << input_type
3265                      << " to " << result_type;
3266       }
3267       break;
3268 
3269     case DataType::Type::kInt32:
3270       switch (input_type) {
3271         case DataType::Type::kInt64:
3272           if (in.IsRegisterPair()) {
3273             __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
3274           } else if (in.IsDoubleStackSlot()) {
3275             __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
3276           } else {
3277             DCHECK(in.IsConstant());
3278             DCHECK(in.GetConstant()->IsLongConstant());
3279             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3280             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
3281           }
3282           break;
3283 
3284         case DataType::Type::kFloat32: {
3285           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3286           Register output = out.AsRegister<Register>();
3287           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3288           NearLabel done, nan;
3289 
3290           __ movl(output, Immediate(kPrimIntMax));
3291           // temp = int-to-float(output)
3292           __ cvtsi2ss(temp, output);
3293           // if input >= temp goto done
3294           __ comiss(input, temp);
3295           __ j(kAboveEqual, &done);
3296           // if input == NaN goto nan
3297           __ j(kUnordered, &nan);
3298           // output = float-to-int-truncate(input)
3299           __ cvttss2si(output, input);
3300           __ jmp(&done);
3301           __ Bind(&nan);
3302           //  output = 0
3303           __ xorl(output, output);
3304           __ Bind(&done);
3305           break;
3306         }
3307 
3308         case DataType::Type::kFloat64: {
3309           XmmRegister input = in.AsFpuRegister<XmmRegister>();
3310           Register output = out.AsRegister<Register>();
3311           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3312           NearLabel done, nan;
3313 
3314           __ movl(output, Immediate(kPrimIntMax));
3315           // temp = int-to-double(output)
3316           __ cvtsi2sd(temp, output);
3317           // if input >= temp goto done
3318           __ comisd(input, temp);
3319           __ j(kAboveEqual, &done);
3320           // if input == NaN goto nan
3321           __ j(kUnordered, &nan);
3322           // output = double-to-int-truncate(input)
3323           __ cvttsd2si(output, input);
3324           __ jmp(&done);
3325           __ Bind(&nan);
3326           //  output = 0
3327           __ xorl(output, output);
3328           __ Bind(&done);
3329           break;
3330         }
3331 
3332         default:
3333           LOG(FATAL) << "Unexpected type conversion from " << input_type
3334                      << " to " << result_type;
3335       }
3336       break;
3337 
3338     case DataType::Type::kInt64:
3339       switch (input_type) {
3340         case DataType::Type::kBool:
3341         case DataType::Type::kUint8:
3342         case DataType::Type::kInt8:
3343         case DataType::Type::kUint16:
3344         case DataType::Type::kInt16:
3345         case DataType::Type::kInt32:
3346           DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
3347           DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
3348           DCHECK_EQ(in.AsRegister<Register>(), EAX);
3349           __ cdq();
3350           break;
3351 
3352         case DataType::Type::kFloat32:
3353           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
3354           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
3355           break;
3356 
3357         case DataType::Type::kFloat64:
3358           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
3359           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
3360           break;
3361 
3362         default:
3363           LOG(FATAL) << "Unexpected type conversion from " << input_type
3364                      << " to " << result_type;
3365       }
3366       break;
3367 
3368     case DataType::Type::kFloat32:
3369       switch (input_type) {
3370         case DataType::Type::kBool:
3371         case DataType::Type::kUint8:
3372         case DataType::Type::kInt8:
3373         case DataType::Type::kUint16:
3374         case DataType::Type::kInt16:
3375         case DataType::Type::kInt32:
3376           __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3377           break;
3378 
3379         case DataType::Type::kInt64: {
3380           size_t adjustment = 0;
3381 
3382           // Create stack space for the call to
3383           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
3384           // TODO: enhance register allocator to ask for stack temporaries.
3385           if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
3386             adjustment = DataType::Size(DataType::Type::kInt64);
3387             codegen_->IncreaseFrame(adjustment);
3388           }
3389 
3390           // Load the value to the FP stack, using temporaries if needed.
3391           PushOntoFPStack(in, 0, adjustment, false, true);
3392 
3393           if (out.IsStackSlot()) {
3394             __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
3395           } else {
3396             __ fstps(Address(ESP, 0));
3397             Location stack_temp = Location::StackSlot(0);
3398             codegen_->Move32(out, stack_temp);
3399           }
3400 
3401           // Remove the temporary stack space we allocated.
3402           if (adjustment != 0) {
3403             codegen_->DecreaseFrame(adjustment);
3404           }
3405           break;
3406         }
3407 
3408         case DataType::Type::kFloat64:
3409           __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3410           break;
3411 
3412         default:
3413           LOG(FATAL) << "Unexpected type conversion from " << input_type
3414                      << " to " << result_type;
3415       }
3416       break;
3417 
3418     case DataType::Type::kFloat64:
3419       switch (input_type) {
3420         case DataType::Type::kBool:
3421         case DataType::Type::kUint8:
3422         case DataType::Type::kInt8:
3423         case DataType::Type::kUint16:
3424         case DataType::Type::kInt16:
3425         case DataType::Type::kInt32:
3426           __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3427           break;
3428 
3429         case DataType::Type::kInt64: {
3430           size_t adjustment = 0;
3431 
3432           // Create stack space for the call to
3433           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3434           // TODO: enhance register allocator to ask for stack temporaries.
3435           if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3436             adjustment = DataType::Size(DataType::Type::kInt64);
3437             codegen_->IncreaseFrame(adjustment);
3438           }
3439 
3440           // Load the value to the FP stack, using temporaries if needed.
3441           PushOntoFPStack(in, 0, adjustment, false, true);
3442 
3443           if (out.IsDoubleStackSlot()) {
3444             __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3445           } else {
3446             __ fstpl(Address(ESP, 0));
3447             Location stack_temp = Location::DoubleStackSlot(0);
3448             codegen_->Move64(out, stack_temp);
3449           }
3450 
3451           // Remove the temporary stack space we allocated.
3452           if (adjustment != 0) {
3453             codegen_->DecreaseFrame(adjustment);
3454           }
3455           break;
3456         }
3457 
3458         case DataType::Type::kFloat32:
3459           __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3460           break;
3461 
3462         default:
3463           LOG(FATAL) << "Unexpected type conversion from " << input_type
3464                      << " to " << result_type;
3465       }
3466       break;
3467 
3468     default:
3469       LOG(FATAL) << "Unexpected type conversion from " << input_type
3470                  << " to " << result_type;
3471   }
3472 }
3473 
VisitAdd(HAdd * add)3474 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3475   LocationSummary* locations =
3476       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3477   switch (add->GetResultType()) {
3478     case DataType::Type::kInt32: {
3479       locations->SetInAt(0, Location::RequiresRegister());
3480       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3481       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3482       break;
3483     }
3484 
3485     case DataType::Type::kInt64: {
3486       locations->SetInAt(0, Location::RequiresRegister());
3487       locations->SetInAt(1, Location::Any());
3488       locations->SetOut(Location::SameAsFirstInput());
3489       break;
3490     }
3491 
3492     case DataType::Type::kFloat32:
3493     case DataType::Type::kFloat64: {
3494       locations->SetInAt(0, Location::RequiresFpuRegister());
3495       if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3496         DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3497       } else if (add->InputAt(1)->IsConstant()) {
3498         locations->SetInAt(1, Location::RequiresFpuRegister());
3499       } else {
3500         locations->SetInAt(1, Location::Any());
3501       }
3502       locations->SetOut(Location::SameAsFirstInput());
3503       break;
3504     }
3505 
3506     default:
3507       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3508       UNREACHABLE();
3509   }
3510 }
3511 
VisitAdd(HAdd * add)3512 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3513   LocationSummary* locations = add->GetLocations();
3514   Location first = locations->InAt(0);
3515   Location second = locations->InAt(1);
3516   Location out = locations->Out();
3517 
3518   switch (add->GetResultType()) {
3519     case DataType::Type::kInt32: {
3520       if (second.IsRegister()) {
3521         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3522           __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3523         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3524           __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3525         } else {
3526           __ leal(out.AsRegister<Register>(), Address(
3527               first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3528           }
3529       } else if (second.IsConstant()) {
3530         int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3531         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3532           __ addl(out.AsRegister<Register>(), Immediate(value));
3533         } else {
3534           __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3535         }
3536       } else {
3537         DCHECK(first.Equals(locations->Out()));
3538         __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3539       }
3540       break;
3541     }
3542 
3543     case DataType::Type::kInt64: {
3544       if (second.IsRegisterPair()) {
3545         __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3546         __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3547       } else if (second.IsDoubleStackSlot()) {
3548         __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3549         __ adcl(first.AsRegisterPairHigh<Register>(),
3550                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3551       } else {
3552         DCHECK(second.IsConstant()) << second;
3553         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3554         __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3555         __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3556       }
3557       break;
3558     }
3559 
3560     case DataType::Type::kFloat32: {
3561       if (second.IsFpuRegister()) {
3562         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3563       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3564         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3565         DCHECK(const_area->IsEmittedAtUseSite());
3566         __ addss(first.AsFpuRegister<XmmRegister>(),
3567                  codegen_->LiteralFloatAddress(
3568                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3569                      const_area->GetBaseMethodAddress(),
3570                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3571       } else {
3572         DCHECK(second.IsStackSlot());
3573         __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3574       }
3575       break;
3576     }
3577 
3578     case DataType::Type::kFloat64: {
3579       if (second.IsFpuRegister()) {
3580         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3581       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3582         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3583         DCHECK(const_area->IsEmittedAtUseSite());
3584         __ addsd(first.AsFpuRegister<XmmRegister>(),
3585                  codegen_->LiteralDoubleAddress(
3586                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3587                      const_area->GetBaseMethodAddress(),
3588                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3589       } else {
3590         DCHECK(second.IsDoubleStackSlot());
3591         __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3592       }
3593       break;
3594     }
3595 
3596     default:
3597       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3598   }
3599 }
3600 
VisitSub(HSub * sub)3601 void LocationsBuilderX86::VisitSub(HSub* sub) {
3602   LocationSummary* locations =
3603       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3604   switch (sub->GetResultType()) {
3605     case DataType::Type::kInt32:
3606     case DataType::Type::kInt64: {
3607       locations->SetInAt(0, Location::RequiresRegister());
3608       locations->SetInAt(1, Location::Any());
3609       locations->SetOut(Location::SameAsFirstInput());
3610       break;
3611     }
3612     case DataType::Type::kFloat32:
3613     case DataType::Type::kFloat64: {
3614       locations->SetInAt(0, Location::RequiresFpuRegister());
3615       if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3616         DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3617       } else if (sub->InputAt(1)->IsConstant()) {
3618         locations->SetInAt(1, Location::RequiresFpuRegister());
3619       } else {
3620         locations->SetInAt(1, Location::Any());
3621       }
3622       locations->SetOut(Location::SameAsFirstInput());
3623       break;
3624     }
3625 
3626     default:
3627       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3628   }
3629 }
3630 
VisitSub(HSub * sub)3631 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3632   LocationSummary* locations = sub->GetLocations();
3633   Location first = locations->InAt(0);
3634   Location second = locations->InAt(1);
3635   DCHECK(first.Equals(locations->Out()));
3636   switch (sub->GetResultType()) {
3637     case DataType::Type::kInt32: {
3638       if (second.IsRegister()) {
3639         __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3640       } else if (second.IsConstant()) {
3641         __ subl(first.AsRegister<Register>(),
3642                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3643       } else {
3644         __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3645       }
3646       break;
3647     }
3648 
3649     case DataType::Type::kInt64: {
3650       if (second.IsRegisterPair()) {
3651         __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3652         __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3653       } else if (second.IsDoubleStackSlot()) {
3654         __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3655         __ sbbl(first.AsRegisterPairHigh<Register>(),
3656                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3657       } else {
3658         DCHECK(second.IsConstant()) << second;
3659         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3660         __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3661         __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3662       }
3663       break;
3664     }
3665 
3666     case DataType::Type::kFloat32: {
3667       if (second.IsFpuRegister()) {
3668         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3669       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3670         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3671         DCHECK(const_area->IsEmittedAtUseSite());
3672         __ subss(first.AsFpuRegister<XmmRegister>(),
3673                  codegen_->LiteralFloatAddress(
3674                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3675                      const_area->GetBaseMethodAddress(),
3676                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3677       } else {
3678         DCHECK(second.IsStackSlot());
3679         __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3680       }
3681       break;
3682     }
3683 
3684     case DataType::Type::kFloat64: {
3685       if (second.IsFpuRegister()) {
3686         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3687       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3688         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3689         DCHECK(const_area->IsEmittedAtUseSite());
3690         __ subsd(first.AsFpuRegister<XmmRegister>(),
3691                  codegen_->LiteralDoubleAddress(
3692                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3693                      const_area->GetBaseMethodAddress(),
3694                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3695       } else {
3696         DCHECK(second.IsDoubleStackSlot());
3697         __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3698       }
3699       break;
3700     }
3701 
3702     default:
3703       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3704   }
3705 }
3706 
VisitMul(HMul * mul)3707 void LocationsBuilderX86::VisitMul(HMul* mul) {
3708   LocationSummary* locations =
3709       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3710   switch (mul->GetResultType()) {
3711     case DataType::Type::kInt32:
3712       locations->SetInAt(0, Location::RequiresRegister());
3713       locations->SetInAt(1, Location::Any());
3714       if (mul->InputAt(1)->IsIntConstant()) {
3715         // Can use 3 operand multiply.
3716         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3717       } else {
3718         locations->SetOut(Location::SameAsFirstInput());
3719       }
3720       break;
3721     case DataType::Type::kInt64: {
3722       locations->SetInAt(0, Location::RequiresRegister());
3723       locations->SetInAt(1, Location::Any());
3724       locations->SetOut(Location::SameAsFirstInput());
3725       // Needed for imul on 32bits with 64bits output.
3726       locations->AddTemp(Location::RegisterLocation(EAX));
3727       locations->AddTemp(Location::RegisterLocation(EDX));
3728       break;
3729     }
3730     case DataType::Type::kFloat32:
3731     case DataType::Type::kFloat64: {
3732       locations->SetInAt(0, Location::RequiresFpuRegister());
3733       if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3734         DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3735       } else if (mul->InputAt(1)->IsConstant()) {
3736         locations->SetInAt(1, Location::RequiresFpuRegister());
3737       } else {
3738         locations->SetInAt(1, Location::Any());
3739       }
3740       locations->SetOut(Location::SameAsFirstInput());
3741       break;
3742     }
3743 
3744     default:
3745       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3746   }
3747 }
3748 
VisitMul(HMul * mul)3749 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3750   LocationSummary* locations = mul->GetLocations();
3751   Location first = locations->InAt(0);
3752   Location second = locations->InAt(1);
3753   Location out = locations->Out();
3754 
3755   switch (mul->GetResultType()) {
3756     case DataType::Type::kInt32:
3757       // The constant may have ended up in a register, so test explicitly to avoid
3758       // problems where the output may not be the same as the first operand.
3759       if (mul->InputAt(1)->IsIntConstant()) {
3760         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3761         __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3762       } else if (second.IsRegister()) {
3763         DCHECK(first.Equals(out));
3764         __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3765       } else {
3766         DCHECK(second.IsStackSlot());
3767         DCHECK(first.Equals(out));
3768         __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3769       }
3770       break;
3771 
3772     case DataType::Type::kInt64: {
3773       Register in1_hi = first.AsRegisterPairHigh<Register>();
3774       Register in1_lo = first.AsRegisterPairLow<Register>();
3775       Register eax = locations->GetTemp(0).AsRegister<Register>();
3776       Register edx = locations->GetTemp(1).AsRegister<Register>();
3777 
3778       DCHECK_EQ(EAX, eax);
3779       DCHECK_EQ(EDX, edx);
3780 
3781       // input: in1 - 64 bits, in2 - 64 bits.
3782       // output: in1
3783       // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3784       // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3785       // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3786       if (second.IsConstant()) {
3787         DCHECK(second.GetConstant()->IsLongConstant());
3788 
3789         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3790         int32_t low_value = Low32Bits(value);
3791         int32_t high_value = High32Bits(value);
3792         Immediate low(low_value);
3793         Immediate high(high_value);
3794 
3795         __ movl(eax, high);
3796         // eax <- in1.lo * in2.hi
3797         __ imull(eax, in1_lo);
3798         // in1.hi <- in1.hi * in2.lo
3799         __ imull(in1_hi, low);
3800         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3801         __ addl(in1_hi, eax);
3802         // move in2_lo to eax to prepare for double precision
3803         __ movl(eax, low);
3804         // edx:eax <- in1.lo * in2.lo
3805         __ mull(in1_lo);
3806         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3807         __ addl(in1_hi, edx);
3808         // in1.lo <- (in1.lo * in2.lo)[31:0];
3809         __ movl(in1_lo, eax);
3810       } else if (second.IsRegisterPair()) {
3811         Register in2_hi = second.AsRegisterPairHigh<Register>();
3812         Register in2_lo = second.AsRegisterPairLow<Register>();
3813 
3814         __ movl(eax, in2_hi);
3815         // eax <- in1.lo * in2.hi
3816         __ imull(eax, in1_lo);
3817         // in1.hi <- in1.hi * in2.lo
3818         __ imull(in1_hi, in2_lo);
3819         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3820         __ addl(in1_hi, eax);
3821         // move in1_lo to eax to prepare for double precision
3822         __ movl(eax, in1_lo);
3823         // edx:eax <- in1.lo * in2.lo
3824         __ mull(in2_lo);
3825         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3826         __ addl(in1_hi, edx);
3827         // in1.lo <- (in1.lo * in2.lo)[31:0];
3828         __ movl(in1_lo, eax);
3829       } else {
3830         DCHECK(second.IsDoubleStackSlot()) << second;
3831         Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3832         Address in2_lo(ESP, second.GetStackIndex());
3833 
3834         __ movl(eax, in2_hi);
3835         // eax <- in1.lo * in2.hi
3836         __ imull(eax, in1_lo);
3837         // in1.hi <- in1.hi * in2.lo
3838         __ imull(in1_hi, in2_lo);
3839         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3840         __ addl(in1_hi, eax);
3841         // move in1_lo to eax to prepare for double precision
3842         __ movl(eax, in1_lo);
3843         // edx:eax <- in1.lo * in2.lo
3844         __ mull(in2_lo);
3845         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3846         __ addl(in1_hi, edx);
3847         // in1.lo <- (in1.lo * in2.lo)[31:0];
3848         __ movl(in1_lo, eax);
3849       }
3850 
3851       break;
3852     }
3853 
3854     case DataType::Type::kFloat32: {
3855       DCHECK(first.Equals(locations->Out()));
3856       if (second.IsFpuRegister()) {
3857         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3858       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3859         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3860         DCHECK(const_area->IsEmittedAtUseSite());
3861         __ mulss(first.AsFpuRegister<XmmRegister>(),
3862                  codegen_->LiteralFloatAddress(
3863                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3864                      const_area->GetBaseMethodAddress(),
3865                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3866       } else {
3867         DCHECK(second.IsStackSlot());
3868         __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3869       }
3870       break;
3871     }
3872 
3873     case DataType::Type::kFloat64: {
3874       DCHECK(first.Equals(locations->Out()));
3875       if (second.IsFpuRegister()) {
3876         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3877       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3878         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3879         DCHECK(const_area->IsEmittedAtUseSite());
3880         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3881                  codegen_->LiteralDoubleAddress(
3882                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3883                      const_area->GetBaseMethodAddress(),
3884                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3885       } else {
3886         DCHECK(second.IsDoubleStackSlot());
3887         __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3888       }
3889       break;
3890     }
3891 
3892     default:
3893       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3894   }
3895 }
3896 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3897 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3898                                                   uint32_t temp_offset,
3899                                                   uint32_t stack_adjustment,
3900                                                   bool is_fp,
3901                                                   bool is_wide) {
3902   if (source.IsStackSlot()) {
3903     DCHECK(!is_wide);
3904     if (is_fp) {
3905       __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3906     } else {
3907       __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3908     }
3909   } else if (source.IsDoubleStackSlot()) {
3910     DCHECK(is_wide);
3911     if (is_fp) {
3912       __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3913     } else {
3914       __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3915     }
3916   } else {
3917     // Write the value to the temporary location on the stack and load to FP stack.
3918     if (!is_wide) {
3919       Location stack_temp = Location::StackSlot(temp_offset);
3920       codegen_->Move32(stack_temp, source);
3921       if (is_fp) {
3922         __ flds(Address(ESP, temp_offset));
3923       } else {
3924         __ filds(Address(ESP, temp_offset));
3925       }
3926     } else {
3927       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3928       codegen_->Move64(stack_temp, source);
3929       if (is_fp) {
3930         __ fldl(Address(ESP, temp_offset));
3931       } else {
3932         __ fildl(Address(ESP, temp_offset));
3933       }
3934     }
3935   }
3936 }
3937 
GenerateRemFP(HRem * rem)3938 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3939   DataType::Type type = rem->GetResultType();
3940   bool is_float = type == DataType::Type::kFloat32;
3941   size_t elem_size = DataType::Size(type);
3942   LocationSummary* locations = rem->GetLocations();
3943   Location first = locations->InAt(0);
3944   Location second = locations->InAt(1);
3945   Location out = locations->Out();
3946 
3947   // Create stack space for 2 elements.
3948   // TODO: enhance register allocator to ask for stack temporaries.
3949   codegen_->IncreaseFrame(2 * elem_size);
3950 
3951   // Load the values to the FP stack in reverse order, using temporaries if needed.
3952   const bool is_wide = !is_float;
3953   PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3954   PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3955 
3956   // Loop doing FPREM until we stabilize.
3957   NearLabel retry;
3958   __ Bind(&retry);
3959   __ fprem();
3960 
3961   // Move FP status to AX.
3962   __ fstsw();
3963 
3964   // And see if the argument reduction is complete. This is signaled by the
3965   // C2 FPU flag bit set to 0.
3966   __ andl(EAX, Immediate(kC2ConditionMask));
3967   __ j(kNotEqual, &retry);
3968 
3969   // We have settled on the final value. Retrieve it into an XMM register.
3970   // Store FP top of stack to real stack.
3971   if (is_float) {
3972     __ fsts(Address(ESP, 0));
3973   } else {
3974     __ fstl(Address(ESP, 0));
3975   }
3976 
3977   // Pop the 2 items from the FP stack.
3978   __ fucompp();
3979 
3980   // Load the value from the stack into an XMM register.
3981   DCHECK(out.IsFpuRegister()) << out;
3982   if (is_float) {
3983     __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3984   } else {
3985     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3986   }
3987 
3988   // And remove the temporary stack space we allocated.
3989   codegen_->DecreaseFrame(2 * elem_size);
3990 }
3991 
3992 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3993 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3994   DCHECK(instruction->IsDiv() || instruction->IsRem());
3995 
3996   LocationSummary* locations = instruction->GetLocations();
3997   DCHECK(locations->InAt(1).IsConstant());
3998   DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3999 
4000   Register out_register = locations->Out().AsRegister<Register>();
4001   Register input_register = locations->InAt(0).AsRegister<Register>();
4002   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4003 
4004   DCHECK(imm == 1 || imm == -1);
4005 
4006   if (instruction->IsRem()) {
4007     __ xorl(out_register, out_register);
4008   } else {
4009     __ movl(out_register, input_register);
4010     if (imm == -1) {
4011       __ negl(out_register);
4012     }
4013   }
4014 }
4015 
RemByPowerOfTwo(HRem * instruction)4016 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
4017   LocationSummary* locations = instruction->GetLocations();
4018   Location second = locations->InAt(1);
4019 
4020   Register out = locations->Out().AsRegister<Register>();
4021   Register numerator = locations->InAt(0).AsRegister<Register>();
4022 
4023   int32_t imm = Int64FromConstant(second.GetConstant());
4024   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4025   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4026 
4027   Register tmp = locations->GetTemp(0).AsRegister<Register>();
4028   NearLabel done;
4029   __ movl(out, numerator);
4030   __ andl(out, Immediate(abs_imm-1));
4031   __ j(Condition::kZero, &done);
4032   __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4033   __ testl(numerator, numerator);
4034   __ cmovl(Condition::kLess, out, tmp);
4035   __ Bind(&done);
4036 }
4037 
DivByPowerOfTwo(HDiv * instruction)4038 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
4039   LocationSummary* locations = instruction->GetLocations();
4040 
4041   Register out_register = locations->Out().AsRegister<Register>();
4042   Register input_register = locations->InAt(0).AsRegister<Register>();
4043   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4044   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4045   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
4046 
4047   Register num = locations->GetTemp(0).AsRegister<Register>();
4048 
4049   __ leal(num, Address(input_register, abs_imm - 1));
4050   __ testl(input_register, input_register);
4051   __ cmovl(kGreaterEqual, num, input_register);
4052   int shift = CTZ(imm);
4053   __ sarl(num, Immediate(shift));
4054 
4055   if (imm < 0) {
4056     __ negl(num);
4057   }
4058 
4059   __ movl(out_register, num);
4060 }
4061 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4062 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4063   DCHECK(instruction->IsDiv() || instruction->IsRem());
4064 
4065   LocationSummary* locations = instruction->GetLocations();
4066   int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
4067 
4068   Register eax = locations->InAt(0).AsRegister<Register>();
4069   Register out = locations->Out().AsRegister<Register>();
4070   Register num;
4071   Register edx;
4072 
4073   if (instruction->IsDiv()) {
4074     edx = locations->GetTemp(0).AsRegister<Register>();
4075     num = locations->GetTemp(1).AsRegister<Register>();
4076   } else {
4077     edx = locations->Out().AsRegister<Register>();
4078     num = locations->GetTemp(0).AsRegister<Register>();
4079   }
4080 
4081   DCHECK_EQ(EAX, eax);
4082   DCHECK_EQ(EDX, edx);
4083   if (instruction->IsDiv()) {
4084     DCHECK_EQ(EAX, out);
4085   } else {
4086     DCHECK_EQ(EDX, out);
4087   }
4088 
4089   int64_t magic;
4090   int shift;
4091   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
4092 
4093   // Save the numerator.
4094   __ movl(num, eax);
4095 
4096   // EAX = magic
4097   __ movl(eax, Immediate(magic));
4098 
4099   // EDX:EAX = magic * numerator
4100   __ imull(num);
4101 
4102   if (imm > 0 && magic < 0) {
4103     // EDX += num
4104     __ addl(edx, num);
4105   } else if (imm < 0 && magic > 0) {
4106     __ subl(edx, num);
4107   }
4108 
4109   // Shift if needed.
4110   if (shift != 0) {
4111     __ sarl(edx, Immediate(shift));
4112   }
4113 
4114   // EDX += 1 if EDX < 0
4115   __ movl(eax, edx);
4116   __ shrl(edx, Immediate(31));
4117   __ addl(edx, eax);
4118 
4119   if (instruction->IsRem()) {
4120     __ movl(eax, num);
4121     __ imull(edx, Immediate(imm));
4122     __ subl(eax, edx);
4123     __ movl(edx, eax);
4124   } else {
4125     __ movl(eax, edx);
4126   }
4127 }
4128 
GenerateDivRemIntegral(HBinaryOperation * instruction)4129 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4130   DCHECK(instruction->IsDiv() || instruction->IsRem());
4131 
4132   LocationSummary* locations = instruction->GetLocations();
4133   Location out = locations->Out();
4134   Location first = locations->InAt(0);
4135   Location second = locations->InAt(1);
4136   bool is_div = instruction->IsDiv();
4137 
4138   switch (instruction->GetResultType()) {
4139     case DataType::Type::kInt32: {
4140       DCHECK_EQ(EAX, first.AsRegister<Register>());
4141       DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
4142 
4143       if (second.IsConstant()) {
4144         int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
4145 
4146         if (imm == 0) {
4147           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
4148         } else if (imm == 1 || imm == -1) {
4149           DivRemOneOrMinusOne(instruction);
4150         } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4151           if (is_div) {
4152             DivByPowerOfTwo(instruction->AsDiv());
4153           } else {
4154             RemByPowerOfTwo(instruction->AsRem());
4155           }
4156         } else {
4157           DCHECK(imm <= -2 || imm >= 2);
4158           GenerateDivRemWithAnyConstant(instruction);
4159         }
4160       } else {
4161         SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
4162             instruction, out.AsRegister<Register>(), is_div);
4163         codegen_->AddSlowPath(slow_path);
4164 
4165         Register second_reg = second.AsRegister<Register>();
4166         // 0x80000000/-1 triggers an arithmetic exception!
4167         // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
4168         // it's safe to just use negl instead of more complex comparisons.
4169 
4170         __ cmpl(second_reg, Immediate(-1));
4171         __ j(kEqual, slow_path->GetEntryLabel());
4172 
4173         // edx:eax <- sign-extended of eax
4174         __ cdq();
4175         // eax = quotient, edx = remainder
4176         __ idivl(second_reg);
4177         __ Bind(slow_path->GetExitLabel());
4178       }
4179       break;
4180     }
4181 
4182     case DataType::Type::kInt64: {
4183       InvokeRuntimeCallingConvention calling_convention;
4184       DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
4185       DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
4186       DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
4187       DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
4188       DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
4189       DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
4190 
4191       if (is_div) {
4192         codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
4193         CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
4194       } else {
4195         codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
4196         CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
4197       }
4198       break;
4199     }
4200 
4201     default:
4202       LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
4203   }
4204 }
4205 
VisitDiv(HDiv * div)4206 void LocationsBuilderX86::VisitDiv(HDiv* div) {
4207   LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
4208       ? LocationSummary::kCallOnMainOnly
4209       : LocationSummary::kNoCall;
4210   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
4211 
4212   switch (div->GetResultType()) {
4213     case DataType::Type::kInt32: {
4214       locations->SetInAt(0, Location::RegisterLocation(EAX));
4215       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4216       locations->SetOut(Location::SameAsFirstInput());
4217       // Intel uses edx:eax as the dividend.
4218       locations->AddTemp(Location::RegisterLocation(EDX));
4219       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4220       // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
4221       // output and request another temp.
4222       if (div->InputAt(1)->IsIntConstant()) {
4223         locations->AddTemp(Location::RequiresRegister());
4224       }
4225       break;
4226     }
4227     case DataType::Type::kInt64: {
4228       InvokeRuntimeCallingConvention calling_convention;
4229       locations->SetInAt(0, Location::RegisterPairLocation(
4230           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4231       locations->SetInAt(1, Location::RegisterPairLocation(
4232           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4233       // Runtime helper puts the result in EAX, EDX.
4234       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4235       break;
4236     }
4237     case DataType::Type::kFloat32:
4238     case DataType::Type::kFloat64: {
4239       locations->SetInAt(0, Location::RequiresFpuRegister());
4240       if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4241         DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
4242       } else if (div->InputAt(1)->IsConstant()) {
4243         locations->SetInAt(1, Location::RequiresFpuRegister());
4244       } else {
4245         locations->SetInAt(1, Location::Any());
4246       }
4247       locations->SetOut(Location::SameAsFirstInput());
4248       break;
4249     }
4250 
4251     default:
4252       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4253   }
4254 }
4255 
VisitDiv(HDiv * div)4256 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
4257   LocationSummary* locations = div->GetLocations();
4258   Location first = locations->InAt(0);
4259   Location second = locations->InAt(1);
4260 
4261   switch (div->GetResultType()) {
4262     case DataType::Type::kInt32:
4263     case DataType::Type::kInt64: {
4264       GenerateDivRemIntegral(div);
4265       break;
4266     }
4267 
4268     case DataType::Type::kFloat32: {
4269       if (second.IsFpuRegister()) {
4270         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4271       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4272         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4273         DCHECK(const_area->IsEmittedAtUseSite());
4274         __ divss(first.AsFpuRegister<XmmRegister>(),
4275                  codegen_->LiteralFloatAddress(
4276                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
4277                    const_area->GetBaseMethodAddress(),
4278                    const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4279       } else {
4280         DCHECK(second.IsStackSlot());
4281         __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4282       }
4283       break;
4284     }
4285 
4286     case DataType::Type::kFloat64: {
4287       if (second.IsFpuRegister()) {
4288         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4289       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
4290         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
4291         DCHECK(const_area->IsEmittedAtUseSite());
4292         __ divsd(first.AsFpuRegister<XmmRegister>(),
4293                  codegen_->LiteralDoubleAddress(
4294                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
4295                      const_area->GetBaseMethodAddress(),
4296                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
4297       } else {
4298         DCHECK(second.IsDoubleStackSlot());
4299         __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
4300       }
4301       break;
4302     }
4303 
4304     default:
4305       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4306   }
4307 }
4308 
VisitRem(HRem * rem)4309 void LocationsBuilderX86::VisitRem(HRem* rem) {
4310   DataType::Type type = rem->GetResultType();
4311 
4312   LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
4313       ? LocationSummary::kCallOnMainOnly
4314       : LocationSummary::kNoCall;
4315   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
4316 
4317   switch (type) {
4318     case DataType::Type::kInt32: {
4319       locations->SetInAt(0, Location::RegisterLocation(EAX));
4320       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4321       locations->SetOut(Location::RegisterLocation(EDX));
4322       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4323       // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
4324       // output and request another temp.
4325       if (rem->InputAt(1)->IsIntConstant()) {
4326         locations->AddTemp(Location::RequiresRegister());
4327       }
4328       break;
4329     }
4330     case DataType::Type::kInt64: {
4331       InvokeRuntimeCallingConvention calling_convention;
4332       locations->SetInAt(0, Location::RegisterPairLocation(
4333           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
4334       locations->SetInAt(1, Location::RegisterPairLocation(
4335           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
4336       // Runtime helper puts the result in EAX, EDX.
4337       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
4338       break;
4339     }
4340     case DataType::Type::kFloat64:
4341     case DataType::Type::kFloat32: {
4342       locations->SetInAt(0, Location::Any());
4343       locations->SetInAt(1, Location::Any());
4344       locations->SetOut(Location::RequiresFpuRegister());
4345       locations->AddTemp(Location::RegisterLocation(EAX));
4346       break;
4347     }
4348 
4349     default:
4350       LOG(FATAL) << "Unexpected rem type " << type;
4351   }
4352 }
4353 
VisitRem(HRem * rem)4354 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
4355   DataType::Type type = rem->GetResultType();
4356   switch (type) {
4357     case DataType::Type::kInt32:
4358     case DataType::Type::kInt64: {
4359       GenerateDivRemIntegral(rem);
4360       break;
4361     }
4362     case DataType::Type::kFloat32:
4363     case DataType::Type::kFloat64: {
4364       GenerateRemFP(rem);
4365       break;
4366     }
4367     default:
4368       LOG(FATAL) << "Unexpected rem type " << type;
4369   }
4370 }
4371 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4372 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4373   LocationSummary* locations = new (allocator) LocationSummary(minmax);
4374   switch (minmax->GetResultType()) {
4375     case DataType::Type::kInt32:
4376       locations->SetInAt(0, Location::RequiresRegister());
4377       locations->SetInAt(1, Location::RequiresRegister());
4378       locations->SetOut(Location::SameAsFirstInput());
4379       break;
4380     case DataType::Type::kInt64:
4381       locations->SetInAt(0, Location::RequiresRegister());
4382       locations->SetInAt(1, Location::RequiresRegister());
4383       locations->SetOut(Location::SameAsFirstInput());
4384       // Register to use to perform a long subtract to set cc.
4385       locations->AddTemp(Location::RequiresRegister());
4386       break;
4387     case DataType::Type::kFloat32:
4388       locations->SetInAt(0, Location::RequiresFpuRegister());
4389       locations->SetInAt(1, Location::RequiresFpuRegister());
4390       locations->SetOut(Location::SameAsFirstInput());
4391       locations->AddTemp(Location::RequiresRegister());
4392       break;
4393     case DataType::Type::kFloat64:
4394       locations->SetInAt(0, Location::RequiresFpuRegister());
4395       locations->SetInAt(1, Location::RequiresFpuRegister());
4396       locations->SetOut(Location::SameAsFirstInput());
4397       break;
4398     default:
4399       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4400   }
4401 }
4402 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4403 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4404                                                     bool is_min,
4405                                                     DataType::Type type) {
4406   Location op1_loc = locations->InAt(0);
4407   Location op2_loc = locations->InAt(1);
4408 
4409   // Shortcut for same input locations.
4410   if (op1_loc.Equals(op2_loc)) {
4411     // Can return immediately, as op1_loc == out_loc.
4412     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4413     //       a copy here.
4414     DCHECK(locations->Out().Equals(op1_loc));
4415     return;
4416   }
4417 
4418   if (type == DataType::Type::kInt64) {
4419     // Need to perform a subtract to get the sign right.
4420     // op1 is already in the same location as the output.
4421     Location output = locations->Out();
4422     Register output_lo = output.AsRegisterPairLow<Register>();
4423     Register output_hi = output.AsRegisterPairHigh<Register>();
4424 
4425     Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4426     Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4427 
4428     // The comparison is performed by subtracting the second operand from
4429     // the first operand and then setting the status flags in the same
4430     // manner as the SUB instruction."
4431     __ cmpl(output_lo, op2_lo);
4432 
4433     // Now use a temp and the borrow to finish the subtraction of op2_hi.
4434     Register temp = locations->GetTemp(0).AsRegister<Register>();
4435     __ movl(temp, output_hi);
4436     __ sbbl(temp, op2_hi);
4437 
4438     // Now the condition code is correct.
4439     Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4440     __ cmovl(cond, output_lo, op2_lo);
4441     __ cmovl(cond, output_hi, op2_hi);
4442   } else {
4443     DCHECK_EQ(type, DataType::Type::kInt32);
4444     Register out = locations->Out().AsRegister<Register>();
4445     Register op2 = op2_loc.AsRegister<Register>();
4446 
4447     //  (out := op1)
4448     //  out <=? op2
4449     //  if out is min jmp done
4450     //  out := op2
4451     // done:
4452 
4453     __ cmpl(out, op2);
4454     Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4455     __ cmovl(cond, out, op2);
4456   }
4457 }
4458 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4459 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4460                                                    bool is_min,
4461                                                    DataType::Type type) {
4462   Location op1_loc = locations->InAt(0);
4463   Location op2_loc = locations->InAt(1);
4464   Location out_loc = locations->Out();
4465   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4466 
4467   // Shortcut for same input locations.
4468   if (op1_loc.Equals(op2_loc)) {
4469     DCHECK(out_loc.Equals(op1_loc));
4470     return;
4471   }
4472 
4473   //  (out := op1)
4474   //  out <=? op2
4475   //  if Nan jmp Nan_label
4476   //  if out is min jmp done
4477   //  if op2 is min jmp op2_label
4478   //  handle -0/+0
4479   //  jmp done
4480   // Nan_label:
4481   //  out := NaN
4482   // op2_label:
4483   //  out := op2
4484   // done:
4485   //
4486   // This removes one jmp, but needs to copy one input (op1) to out.
4487   //
4488   // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4489 
4490   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4491 
4492   NearLabel nan, done, op2_label;
4493   if (type == DataType::Type::kFloat64) {
4494     __ ucomisd(out, op2);
4495   } else {
4496     DCHECK_EQ(type, DataType::Type::kFloat32);
4497     __ ucomiss(out, op2);
4498   }
4499 
4500   __ j(Condition::kParityEven, &nan);
4501 
4502   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4503   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4504 
4505   // Handle 0.0/-0.0.
4506   if (is_min) {
4507     if (type == DataType::Type::kFloat64) {
4508       __ orpd(out, op2);
4509     } else {
4510       __ orps(out, op2);
4511     }
4512   } else {
4513     if (type == DataType::Type::kFloat64) {
4514       __ andpd(out, op2);
4515     } else {
4516       __ andps(out, op2);
4517     }
4518   }
4519   __ jmp(&done);
4520 
4521   // NaN handling.
4522   __ Bind(&nan);
4523   if (type == DataType::Type::kFloat64) {
4524     // TODO: Use a constant from the constant table (requires extra input).
4525     __ LoadLongConstant(out, kDoubleNaN);
4526   } else {
4527     Register constant = locations->GetTemp(0).AsRegister<Register>();
4528     __ movl(constant, Immediate(kFloatNaN));
4529     __ movd(out, constant);
4530   }
4531   __ jmp(&done);
4532 
4533   // out := op2;
4534   __ Bind(&op2_label);
4535   if (type == DataType::Type::kFloat64) {
4536     __ movsd(out, op2);
4537   } else {
4538     __ movss(out, op2);
4539   }
4540 
4541   // Done.
4542   __ Bind(&done);
4543 }
4544 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4545 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4546   DataType::Type type = minmax->GetResultType();
4547   switch (type) {
4548     case DataType::Type::kInt32:
4549     case DataType::Type::kInt64:
4550       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4551       break;
4552     case DataType::Type::kFloat32:
4553     case DataType::Type::kFloat64:
4554       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4555       break;
4556     default:
4557       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4558   }
4559 }
4560 
VisitMin(HMin * min)4561 void LocationsBuilderX86::VisitMin(HMin* min) {
4562   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4563 }
4564 
VisitMin(HMin * min)4565 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4566   GenerateMinMax(min, /*is_min*/ true);
4567 }
4568 
VisitMax(HMax * max)4569 void LocationsBuilderX86::VisitMax(HMax* max) {
4570   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4571 }
4572 
VisitMax(HMax * max)4573 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4574   GenerateMinMax(max, /*is_min*/ false);
4575 }
4576 
VisitAbs(HAbs * abs)4577 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4578   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4579   switch (abs->GetResultType()) {
4580     case DataType::Type::kInt32:
4581       locations->SetInAt(0, Location::RegisterLocation(EAX));
4582       locations->SetOut(Location::SameAsFirstInput());
4583       locations->AddTemp(Location::RegisterLocation(EDX));
4584       break;
4585     case DataType::Type::kInt64:
4586       locations->SetInAt(0, Location::RequiresRegister());
4587       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4588       locations->AddTemp(Location::RequiresRegister());
4589       break;
4590     case DataType::Type::kFloat32:
4591       locations->SetInAt(0, Location::RequiresFpuRegister());
4592       locations->SetOut(Location::SameAsFirstInput());
4593       locations->AddTemp(Location::RequiresFpuRegister());
4594       locations->AddTemp(Location::RequiresRegister());
4595       break;
4596     case DataType::Type::kFloat64:
4597       locations->SetInAt(0, Location::RequiresFpuRegister());
4598       locations->SetOut(Location::SameAsFirstInput());
4599       locations->AddTemp(Location::RequiresFpuRegister());
4600       break;
4601     default:
4602       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4603   }
4604 }
4605 
VisitAbs(HAbs * abs)4606 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4607   LocationSummary* locations = abs->GetLocations();
4608   switch (abs->GetResultType()) {
4609     case DataType::Type::kInt32: {
4610       Register out = locations->Out().AsRegister<Register>();
4611       DCHECK_EQ(out, EAX);
4612       Register temp = locations->GetTemp(0).AsRegister<Register>();
4613       DCHECK_EQ(temp, EDX);
4614       // Sign extend EAX into EDX.
4615       __ cdq();
4616       // XOR EAX with sign.
4617       __ xorl(EAX, EDX);
4618       // Subtract out sign to correct.
4619       __ subl(EAX, EDX);
4620       // The result is in EAX.
4621       break;
4622     }
4623     case DataType::Type::kInt64: {
4624       Location input = locations->InAt(0);
4625       Register input_lo = input.AsRegisterPairLow<Register>();
4626       Register input_hi = input.AsRegisterPairHigh<Register>();
4627       Location output = locations->Out();
4628       Register output_lo = output.AsRegisterPairLow<Register>();
4629       Register output_hi = output.AsRegisterPairHigh<Register>();
4630       Register temp = locations->GetTemp(0).AsRegister<Register>();
4631       // Compute the sign into the temporary.
4632       __ movl(temp, input_hi);
4633       __ sarl(temp, Immediate(31));
4634       // Store the sign into the output.
4635       __ movl(output_lo, temp);
4636       __ movl(output_hi, temp);
4637       // XOR the input to the output.
4638       __ xorl(output_lo, input_lo);
4639       __ xorl(output_hi, input_hi);
4640       // Subtract the sign.
4641       __ subl(output_lo, temp);
4642       __ sbbl(output_hi, temp);
4643       break;
4644     }
4645     case DataType::Type::kFloat32: {
4646       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4647       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4648       Register constant = locations->GetTemp(1).AsRegister<Register>();
4649       __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4650       __ movd(temp, constant);
4651       __ andps(out, temp);
4652       break;
4653     }
4654     case DataType::Type::kFloat64: {
4655       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4656       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4657       // TODO: Use a constant from the constant table (requires extra input).
4658       __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4659       __ andpd(out, temp);
4660       break;
4661     }
4662     default:
4663       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4664   }
4665 }
4666 
VisitDivZeroCheck(HDivZeroCheck * instruction)4667 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4668   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4669   switch (instruction->GetType()) {
4670     case DataType::Type::kBool:
4671     case DataType::Type::kUint8:
4672     case DataType::Type::kInt8:
4673     case DataType::Type::kUint16:
4674     case DataType::Type::kInt16:
4675     case DataType::Type::kInt32: {
4676       locations->SetInAt(0, Location::Any());
4677       break;
4678     }
4679     case DataType::Type::kInt64: {
4680       locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4681       if (!instruction->IsConstant()) {
4682         locations->AddTemp(Location::RequiresRegister());
4683       }
4684       break;
4685     }
4686     default:
4687       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4688   }
4689 }
4690 
VisitDivZeroCheck(HDivZeroCheck * instruction)4691 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4692   SlowPathCode* slow_path =
4693       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4694   codegen_->AddSlowPath(slow_path);
4695 
4696   LocationSummary* locations = instruction->GetLocations();
4697   Location value = locations->InAt(0);
4698 
4699   switch (instruction->GetType()) {
4700     case DataType::Type::kBool:
4701     case DataType::Type::kUint8:
4702     case DataType::Type::kInt8:
4703     case DataType::Type::kUint16:
4704     case DataType::Type::kInt16:
4705     case DataType::Type::kInt32: {
4706       if (value.IsRegister()) {
4707         __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4708         __ j(kEqual, slow_path->GetEntryLabel());
4709       } else if (value.IsStackSlot()) {
4710         __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4711         __ j(kEqual, slow_path->GetEntryLabel());
4712       } else {
4713         DCHECK(value.IsConstant()) << value;
4714         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4715           __ jmp(slow_path->GetEntryLabel());
4716         }
4717       }
4718       break;
4719     }
4720     case DataType::Type::kInt64: {
4721       if (value.IsRegisterPair()) {
4722         Register temp = locations->GetTemp(0).AsRegister<Register>();
4723         __ movl(temp, value.AsRegisterPairLow<Register>());
4724         __ orl(temp, value.AsRegisterPairHigh<Register>());
4725         __ j(kEqual, slow_path->GetEntryLabel());
4726       } else {
4727         DCHECK(value.IsConstant()) << value;
4728         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4729           __ jmp(slow_path->GetEntryLabel());
4730         }
4731       }
4732       break;
4733     }
4734     default:
4735       LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4736   }
4737 }
4738 
HandleShift(HBinaryOperation * op)4739 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4740   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4741 
4742   LocationSummary* locations =
4743       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4744 
4745   switch (op->GetResultType()) {
4746     case DataType::Type::kInt32:
4747     case DataType::Type::kInt64: {
4748       // Can't have Location::Any() and output SameAsFirstInput()
4749       locations->SetInAt(0, Location::RequiresRegister());
4750       // The shift count needs to be in CL or a constant.
4751       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4752       locations->SetOut(Location::SameAsFirstInput());
4753       break;
4754     }
4755     default:
4756       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4757   }
4758 }
4759 
HandleShift(HBinaryOperation * op)4760 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4761   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4762 
4763   LocationSummary* locations = op->GetLocations();
4764   Location first = locations->InAt(0);
4765   Location second = locations->InAt(1);
4766   DCHECK(first.Equals(locations->Out()));
4767 
4768   switch (op->GetResultType()) {
4769     case DataType::Type::kInt32: {
4770       DCHECK(first.IsRegister());
4771       Register first_reg = first.AsRegister<Register>();
4772       if (second.IsRegister()) {
4773         Register second_reg = second.AsRegister<Register>();
4774         DCHECK_EQ(ECX, second_reg);
4775         if (op->IsShl()) {
4776           __ shll(first_reg, second_reg);
4777         } else if (op->IsShr()) {
4778           __ sarl(first_reg, second_reg);
4779         } else {
4780           __ shrl(first_reg, second_reg);
4781         }
4782       } else {
4783         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4784         if (shift == 0) {
4785           return;
4786         }
4787         Immediate imm(shift);
4788         if (op->IsShl()) {
4789           __ shll(first_reg, imm);
4790         } else if (op->IsShr()) {
4791           __ sarl(first_reg, imm);
4792         } else {
4793           __ shrl(first_reg, imm);
4794         }
4795       }
4796       break;
4797     }
4798     case DataType::Type::kInt64: {
4799       if (second.IsRegister()) {
4800         Register second_reg = second.AsRegister<Register>();
4801         DCHECK_EQ(ECX, second_reg);
4802         if (op->IsShl()) {
4803           GenerateShlLong(first, second_reg);
4804         } else if (op->IsShr()) {
4805           GenerateShrLong(first, second_reg);
4806         } else {
4807           GenerateUShrLong(first, second_reg);
4808         }
4809       } else {
4810         // Shift by a constant.
4811         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4812         // Nothing to do if the shift is 0, as the input is already the output.
4813         if (shift != 0) {
4814           if (op->IsShl()) {
4815             GenerateShlLong(first, shift);
4816           } else if (op->IsShr()) {
4817             GenerateShrLong(first, shift);
4818           } else {
4819             GenerateUShrLong(first, shift);
4820           }
4821         }
4822       }
4823       break;
4824     }
4825     default:
4826       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4827   }
4828 }
4829 
GenerateShlLong(const Location & loc,int shift)4830 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4831   Register low = loc.AsRegisterPairLow<Register>();
4832   Register high = loc.AsRegisterPairHigh<Register>();
4833   if (shift == 1) {
4834     // This is just an addition.
4835     __ addl(low, low);
4836     __ adcl(high, high);
4837   } else if (shift == 32) {
4838     // Shift by 32 is easy. High gets low, and low gets 0.
4839     codegen_->EmitParallelMoves(
4840         loc.ToLow(),
4841         loc.ToHigh(),
4842         DataType::Type::kInt32,
4843         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4844         loc.ToLow(),
4845         DataType::Type::kInt32);
4846   } else if (shift > 32) {
4847     // Low part becomes 0.  High part is low part << (shift-32).
4848     __ movl(high, low);
4849     __ shll(high, Immediate(shift - 32));
4850     __ xorl(low, low);
4851   } else {
4852     // Between 1 and 31.
4853     __ shld(high, low, Immediate(shift));
4854     __ shll(low, Immediate(shift));
4855   }
4856 }
4857 
GenerateShlLong(const Location & loc,Register shifter)4858 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4859   NearLabel done;
4860   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4861   __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4862   __ testl(shifter, Immediate(32));
4863   __ j(kEqual, &done);
4864   __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4865   __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4866   __ Bind(&done);
4867 }
4868 
GenerateShrLong(const Location & loc,int shift)4869 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4870   Register low = loc.AsRegisterPairLow<Register>();
4871   Register high = loc.AsRegisterPairHigh<Register>();
4872   if (shift == 32) {
4873     // Need to copy the sign.
4874     DCHECK_NE(low, high);
4875     __ movl(low, high);
4876     __ sarl(high, Immediate(31));
4877   } else if (shift > 32) {
4878     DCHECK_NE(low, high);
4879     // High part becomes sign. Low part is shifted by shift - 32.
4880     __ movl(low, high);
4881     __ sarl(high, Immediate(31));
4882     __ sarl(low, Immediate(shift - 32));
4883   } else {
4884     // Between 1 and 31.
4885     __ shrd(low, high, Immediate(shift));
4886     __ sarl(high, Immediate(shift));
4887   }
4888 }
4889 
GenerateShrLong(const Location & loc,Register shifter)4890 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4891   NearLabel done;
4892   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4893   __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4894   __ testl(shifter, Immediate(32));
4895   __ j(kEqual, &done);
4896   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4897   __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4898   __ Bind(&done);
4899 }
4900 
GenerateUShrLong(const Location & loc,int shift)4901 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4902   Register low = loc.AsRegisterPairLow<Register>();
4903   Register high = loc.AsRegisterPairHigh<Register>();
4904   if (shift == 32) {
4905     // Shift by 32 is easy. Low gets high, and high gets 0.
4906     codegen_->EmitParallelMoves(
4907         loc.ToHigh(),
4908         loc.ToLow(),
4909         DataType::Type::kInt32,
4910         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4911         loc.ToHigh(),
4912         DataType::Type::kInt32);
4913   } else if (shift > 32) {
4914     // Low part is high >> (shift - 32). High part becomes 0.
4915     __ movl(low, high);
4916     __ shrl(low, Immediate(shift - 32));
4917     __ xorl(high, high);
4918   } else {
4919     // Between 1 and 31.
4920     __ shrd(low, high, Immediate(shift));
4921     __ shrl(high, Immediate(shift));
4922   }
4923 }
4924 
GenerateUShrLong(const Location & loc,Register shifter)4925 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4926   NearLabel done;
4927   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4928   __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4929   __ testl(shifter, Immediate(32));
4930   __ j(kEqual, &done);
4931   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4932   __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4933   __ Bind(&done);
4934 }
4935 
VisitRor(HRor * ror)4936 void LocationsBuilderX86::VisitRor(HRor* ror) {
4937   LocationSummary* locations =
4938       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4939 
4940   switch (ror->GetResultType()) {
4941     case DataType::Type::kInt64:
4942       // Add the temporary needed.
4943       locations->AddTemp(Location::RequiresRegister());
4944       FALLTHROUGH_INTENDED;
4945     case DataType::Type::kInt32:
4946       locations->SetInAt(0, Location::RequiresRegister());
4947       // The shift count needs to be in CL (unless it is a constant).
4948       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4949       locations->SetOut(Location::SameAsFirstInput());
4950       break;
4951     default:
4952       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4953       UNREACHABLE();
4954   }
4955 }
4956 
VisitRor(HRor * ror)4957 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4958   LocationSummary* locations = ror->GetLocations();
4959   Location first = locations->InAt(0);
4960   Location second = locations->InAt(1);
4961 
4962   if (ror->GetResultType() == DataType::Type::kInt32) {
4963     Register first_reg = first.AsRegister<Register>();
4964     if (second.IsRegister()) {
4965       Register second_reg = second.AsRegister<Register>();
4966       __ rorl(first_reg, second_reg);
4967     } else {
4968       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4969       __ rorl(first_reg, imm);
4970     }
4971     return;
4972   }
4973 
4974   DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4975   Register first_reg_lo = first.AsRegisterPairLow<Register>();
4976   Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4977   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4978   if (second.IsRegister()) {
4979     Register second_reg = second.AsRegister<Register>();
4980     DCHECK_EQ(second_reg, ECX);
4981     __ movl(temp_reg, first_reg_hi);
4982     __ shrd(first_reg_hi, first_reg_lo, second_reg);
4983     __ shrd(first_reg_lo, temp_reg, second_reg);
4984     __ movl(temp_reg, first_reg_hi);
4985     __ testl(second_reg, Immediate(32));
4986     __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4987     __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4988   } else {
4989     int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4990     if (shift_amt == 0) {
4991       // Already fine.
4992       return;
4993     }
4994     if (shift_amt == 32) {
4995       // Just swap.
4996       __ movl(temp_reg, first_reg_lo);
4997       __ movl(first_reg_lo, first_reg_hi);
4998       __ movl(first_reg_hi, temp_reg);
4999       return;
5000     }
5001 
5002     Immediate imm(shift_amt);
5003     // Save the constents of the low value.
5004     __ movl(temp_reg, first_reg_lo);
5005 
5006     // Shift right into low, feeding bits from high.
5007     __ shrd(first_reg_lo, first_reg_hi, imm);
5008 
5009     // Shift right into high, feeding bits from the original low.
5010     __ shrd(first_reg_hi, temp_reg, imm);
5011 
5012     // Swap if needed.
5013     if (shift_amt > 32) {
5014       __ movl(temp_reg, first_reg_lo);
5015       __ movl(first_reg_lo, first_reg_hi);
5016       __ movl(first_reg_hi, temp_reg);
5017     }
5018   }
5019 }
5020 
VisitShl(HShl * shl)5021 void LocationsBuilderX86::VisitShl(HShl* shl) {
5022   HandleShift(shl);
5023 }
5024 
VisitShl(HShl * shl)5025 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
5026   HandleShift(shl);
5027 }
5028 
VisitShr(HShr * shr)5029 void LocationsBuilderX86::VisitShr(HShr* shr) {
5030   HandleShift(shr);
5031 }
5032 
VisitShr(HShr * shr)5033 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
5034   HandleShift(shr);
5035 }
5036 
VisitUShr(HUShr * ushr)5037 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
5038   HandleShift(ushr);
5039 }
5040 
VisitUShr(HUShr * ushr)5041 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
5042   HandleShift(ushr);
5043 }
5044 
VisitNewInstance(HNewInstance * instruction)5045 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
5046   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5047       instruction, LocationSummary::kCallOnMainOnly);
5048   locations->SetOut(Location::RegisterLocation(EAX));
5049   InvokeRuntimeCallingConvention calling_convention;
5050   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5051 }
5052 
VisitNewInstance(HNewInstance * instruction)5053 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
5054   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5055   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5056   DCHECK(!codegen_->IsLeafMethod());
5057 }
5058 
VisitNewArray(HNewArray * instruction)5059 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
5060   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5061       instruction, LocationSummary::kCallOnMainOnly);
5062   locations->SetOut(Location::RegisterLocation(EAX));
5063   InvokeRuntimeCallingConvention calling_convention;
5064   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5065   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5066 }
5067 
VisitNewArray(HNewArray * instruction)5068 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
5069   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5070   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5071   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5072   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5073   DCHECK(!codegen_->IsLeafMethod());
5074 }
5075 
VisitParameterValue(HParameterValue * instruction)5076 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
5077   LocationSummary* locations =
5078       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5079   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5080   if (location.IsStackSlot()) {
5081     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5082   } else if (location.IsDoubleStackSlot()) {
5083     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5084   }
5085   locations->SetOut(location);
5086 }
5087 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)5088 void InstructionCodeGeneratorX86::VisitParameterValue(
5089     HParameterValue* instruction ATTRIBUTE_UNUSED) {
5090 }
5091 
VisitCurrentMethod(HCurrentMethod * instruction)5092 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
5093   LocationSummary* locations =
5094       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5095   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5096 }
5097 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)5098 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5099 }
5100 
VisitClassTableGet(HClassTableGet * instruction)5101 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
5102   LocationSummary* locations =
5103       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5104   locations->SetInAt(0, Location::RequiresRegister());
5105   locations->SetOut(Location::RequiresRegister());
5106 }
5107 
VisitClassTableGet(HClassTableGet * instruction)5108 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
5109   LocationSummary* locations = instruction->GetLocations();
5110   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5111     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5112         instruction->GetIndex(), kX86PointerSize).SizeValue();
5113     __ movl(locations->Out().AsRegister<Register>(),
5114             Address(locations->InAt(0).AsRegister<Register>(), method_offset));
5115   } else {
5116     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5117         instruction->GetIndex(), kX86PointerSize));
5118     __ movl(locations->Out().AsRegister<Register>(),
5119             Address(locations->InAt(0).AsRegister<Register>(),
5120                     mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
5121     // temp = temp->GetImtEntryAt(method_offset);
5122     __ movl(locations->Out().AsRegister<Register>(),
5123             Address(locations->Out().AsRegister<Register>(), method_offset));
5124   }
5125 }
5126 
VisitNot(HNot * not_)5127 void LocationsBuilderX86::VisitNot(HNot* not_) {
5128   LocationSummary* locations =
5129       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5130   locations->SetInAt(0, Location::RequiresRegister());
5131   locations->SetOut(Location::SameAsFirstInput());
5132 }
5133 
VisitNot(HNot * not_)5134 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
5135   LocationSummary* locations = not_->GetLocations();
5136   Location in = locations->InAt(0);
5137   Location out = locations->Out();
5138   DCHECK(in.Equals(out));
5139   switch (not_->GetResultType()) {
5140     case DataType::Type::kInt32:
5141       __ notl(out.AsRegister<Register>());
5142       break;
5143 
5144     case DataType::Type::kInt64:
5145       __ notl(out.AsRegisterPairLow<Register>());
5146       __ notl(out.AsRegisterPairHigh<Register>());
5147       break;
5148 
5149     default:
5150       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5151   }
5152 }
5153 
VisitBooleanNot(HBooleanNot * bool_not)5154 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
5155   LocationSummary* locations =
5156       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5157   locations->SetInAt(0, Location::RequiresRegister());
5158   locations->SetOut(Location::SameAsFirstInput());
5159 }
5160 
VisitBooleanNot(HBooleanNot * bool_not)5161 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
5162   LocationSummary* locations = bool_not->GetLocations();
5163   Location in = locations->InAt(0);
5164   Location out = locations->Out();
5165   DCHECK(in.Equals(out));
5166   __ xorl(out.AsRegister<Register>(), Immediate(1));
5167 }
5168 
VisitCompare(HCompare * compare)5169 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
5170   LocationSummary* locations =
5171       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
5172   switch (compare->InputAt(0)->GetType()) {
5173     case DataType::Type::kBool:
5174     case DataType::Type::kUint8:
5175     case DataType::Type::kInt8:
5176     case DataType::Type::kUint16:
5177     case DataType::Type::kInt16:
5178     case DataType::Type::kInt32:
5179     case DataType::Type::kInt64: {
5180       locations->SetInAt(0, Location::RequiresRegister());
5181       locations->SetInAt(1, Location::Any());
5182       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5183       break;
5184     }
5185     case DataType::Type::kFloat32:
5186     case DataType::Type::kFloat64: {
5187       locations->SetInAt(0, Location::RequiresFpuRegister());
5188       if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
5189         DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
5190       } else if (compare->InputAt(1)->IsConstant()) {
5191         locations->SetInAt(1, Location::RequiresFpuRegister());
5192       } else {
5193         locations->SetInAt(1, Location::Any());
5194       }
5195       locations->SetOut(Location::RequiresRegister());
5196       break;
5197     }
5198     default:
5199       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5200   }
5201 }
5202 
VisitCompare(HCompare * compare)5203 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
5204   LocationSummary* locations = compare->GetLocations();
5205   Register out = locations->Out().AsRegister<Register>();
5206   Location left = locations->InAt(0);
5207   Location right = locations->InAt(1);
5208 
5209   NearLabel less, greater, done;
5210   Condition less_cond = kLess;
5211 
5212   switch (compare->InputAt(0)->GetType()) {
5213     case DataType::Type::kBool:
5214     case DataType::Type::kUint8:
5215     case DataType::Type::kInt8:
5216     case DataType::Type::kUint16:
5217     case DataType::Type::kInt16:
5218     case DataType::Type::kInt32: {
5219       codegen_->GenerateIntCompare(left, right);
5220       break;
5221     }
5222     case DataType::Type::kInt64: {
5223       Register left_low = left.AsRegisterPairLow<Register>();
5224       Register left_high = left.AsRegisterPairHigh<Register>();
5225       int32_t val_low = 0;
5226       int32_t val_high = 0;
5227       bool right_is_const = false;
5228 
5229       if (right.IsConstant()) {
5230         DCHECK(right.GetConstant()->IsLongConstant());
5231         right_is_const = true;
5232         int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
5233         val_low = Low32Bits(val);
5234         val_high = High32Bits(val);
5235       }
5236 
5237       if (right.IsRegisterPair()) {
5238         __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
5239       } else if (right.IsDoubleStackSlot()) {
5240         __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
5241       } else {
5242         DCHECK(right_is_const) << right;
5243         codegen_->Compare32BitValue(left_high, val_high);
5244       }
5245       __ j(kLess, &less);  // Signed compare.
5246       __ j(kGreater, &greater);  // Signed compare.
5247       if (right.IsRegisterPair()) {
5248         __ cmpl(left_low, right.AsRegisterPairLow<Register>());
5249       } else if (right.IsDoubleStackSlot()) {
5250         __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
5251       } else {
5252         DCHECK(right_is_const) << right;
5253         codegen_->Compare32BitValue(left_low, val_low);
5254       }
5255       less_cond = kBelow;  // for CF (unsigned).
5256       break;
5257     }
5258     case DataType::Type::kFloat32: {
5259       GenerateFPCompare(left, right, compare, false);
5260       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5261       less_cond = kBelow;  // for CF (floats).
5262       break;
5263     }
5264     case DataType::Type::kFloat64: {
5265       GenerateFPCompare(left, right, compare, true);
5266       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
5267       less_cond = kBelow;  // for CF (floats).
5268       break;
5269     }
5270     default:
5271       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
5272   }
5273 
5274   __ movl(out, Immediate(0));
5275   __ j(kEqual, &done);
5276   __ j(less_cond, &less);
5277 
5278   __ Bind(&greater);
5279   __ movl(out, Immediate(1));
5280   __ jmp(&done);
5281 
5282   __ Bind(&less);
5283   __ movl(out, Immediate(-1));
5284 
5285   __ Bind(&done);
5286 }
5287 
VisitPhi(HPhi * instruction)5288 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
5289   LocationSummary* locations =
5290       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5291   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5292     locations->SetInAt(i, Location::Any());
5293   }
5294   locations->SetOut(Location::Any());
5295 }
5296 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)5297 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5298   LOG(FATAL) << "Unreachable";
5299 }
5300 
GenerateMemoryBarrier(MemBarrierKind kind)5301 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
5302   /*
5303    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
5304    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
5305    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5306    */
5307   switch (kind) {
5308     case MemBarrierKind::kAnyAny: {
5309       MemoryFence();
5310       break;
5311     }
5312     case MemBarrierKind::kAnyStore:
5313     case MemBarrierKind::kLoadAny:
5314     case MemBarrierKind::kStoreStore: {
5315       // nop
5316       break;
5317     }
5318     case MemBarrierKind::kNTStoreStore:
5319       // Non-Temporal Store/Store needs an explicit fence.
5320       MemoryFence(/* non-temporal= */ true);
5321       break;
5322   }
5323 }
5324 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)5325 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
5326       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
5327       ArtMethod* method ATTRIBUTE_UNUSED) {
5328   return desired_dispatch_info;
5329 }
5330 
GetInvokeExtraParameter(HInvoke * invoke,Register temp)5331 Register CodeGeneratorX86::GetInvokeExtraParameter(HInvoke* invoke, Register temp) {
5332   if (invoke->IsInvokeStaticOrDirect()) {
5333     return GetInvokeStaticOrDirectExtraParameter(invoke->AsInvokeStaticOrDirect(), temp);
5334   }
5335   DCHECK(invoke->IsInvokeInterface());
5336   Location location =
5337       invoke->GetLocations()->InAt(invoke->AsInvokeInterface()->GetSpecialInputIndex());
5338   return location.AsRegister<Register>();
5339 }
5340 
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)5341 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
5342                                                                  Register temp) {
5343   Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
5344   if (!invoke->GetLocations()->Intrinsified()) {
5345     return location.AsRegister<Register>();
5346   }
5347   // For intrinsics we allow any location, so it may be on the stack.
5348   if (!location.IsRegister()) {
5349     __ movl(temp, Address(ESP, location.GetStackIndex()));
5350     return temp;
5351   }
5352   // For register locations, check if the register was saved. If so, get it from the stack.
5353   // Note: There is a chance that the register was saved but not overwritten, so we could
5354   // save one load. However, since this is just an intrinsic slow path we prefer this
5355   // simple and more robust approach rather that trying to determine if that's the case.
5356   SlowPathCode* slow_path = GetCurrentSlowPath();
5357   DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
5358   if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
5359     int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
5360     __ movl(temp, Address(ESP, stack_offset));
5361     return temp;
5362   }
5363   return location.AsRegister<Register>();
5364 }
5365 
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)5366 void CodeGeneratorX86::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
5367   switch (load_kind) {
5368     case MethodLoadKind::kBootImageLinkTimePcRelative: {
5369       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5370       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5371       __ leal(temp.AsRegister<Register>(),
5372               Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5373       RecordBootImageMethodPatch(invoke);
5374       break;
5375     }
5376     case MethodLoadKind::kBootImageRelRo: {
5377       size_t index = invoke->IsInvokeInterface()
5378           ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5379           : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5380       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5381       __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5382       RecordBootImageRelRoPatch(
5383           invoke->InputAt(index)->AsX86ComputeBaseMethodAddress(),
5384           GetBootImageOffset(invoke));
5385       break;
5386     }
5387     case MethodLoadKind::kBssEntry: {
5388       Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5389       __ movl(temp.AsRegister<Register>(), Address(base_reg, kPlaceholder32BitOffset));
5390       RecordMethodBssEntryPatch(invoke);
5391       // No need for memory fence, thanks to the x86 memory model.
5392       break;
5393     }
5394     case MethodLoadKind::kJitDirectAddress: {
5395       __ movl(temp.AsRegister<Register>(),
5396               Immediate(reinterpret_cast32<uint32_t>(invoke->GetResolvedMethod())));
5397       break;
5398     }
5399     case MethodLoadKind::kRuntimeCall: {
5400       // Test situation, don't do anything.
5401       break;
5402     }
5403     default: {
5404       LOG(FATAL) << "Load kind should have already been handled " << load_kind;
5405       UNREACHABLE();
5406     }
5407   }
5408 }
5409 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)5410 void CodeGeneratorX86::GenerateStaticOrDirectCall(
5411     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
5412   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
5413   switch (invoke->GetMethodLoadKind()) {
5414     case MethodLoadKind::kStringInit: {
5415       // temp = thread->string_init_entrypoint
5416       uint32_t offset =
5417           GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
5418       __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
5419       break;
5420     }
5421     case MethodLoadKind::kRecursive: {
5422       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
5423       break;
5424     }
5425     case MethodLoadKind::kRuntimeCall: {
5426       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5427       return;  // No code pointer retrieval; the runtime performs the call directly.
5428     }
5429     case MethodLoadKind::kBootImageLinkTimePcRelative:
5430       // For kCallCriticalNative we skip loading the method and do the call directly.
5431       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
5432         break;
5433       }
5434       FALLTHROUGH_INTENDED;
5435     default: {
5436       LoadMethod(invoke->GetMethodLoadKind(), callee_method, invoke);
5437     }
5438   }
5439 
5440   switch (invoke->GetCodePtrLocation()) {
5441     case CodePtrLocation::kCallSelf:
5442       DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
5443       __ call(GetFrameEntryLabel());
5444       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5445       break;
5446     case CodePtrLocation::kCallCriticalNative: {
5447       size_t out_frame_size =
5448           PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86,
5449                                     kNativeStackAlignment,
5450                                     GetCriticalNativeDirectCallFrameSize>(invoke);
5451       if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
5452         DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
5453         Register base_reg = GetInvokeExtraParameter(invoke, temp.AsRegister<Register>());
5454         __ call(Address(base_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5455         RecordBootImageJniEntrypointPatch(invoke);
5456       } else {
5457         // (callee_method + offset_of_jni_entry_point)()
5458         __ call(Address(callee_method.AsRegister<Register>(),
5459                         ArtMethod::EntryPointFromJniOffset(kX86PointerSize).Int32Value()));
5460       }
5461       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5462       if (out_frame_size == 0u && DataType::IsFloatingPointType(invoke->GetType())) {
5463         // Create space for conversion.
5464         out_frame_size = 8u;
5465         IncreaseFrame(out_frame_size);
5466       }
5467       // Zero-/sign-extend or move the result when needed due to native and managed ABI mismatch.
5468       switch (invoke->GetType()) {
5469         case DataType::Type::kBool:
5470           __ movzxb(EAX, AL);
5471           break;
5472         case DataType::Type::kInt8:
5473           __ movsxb(EAX, AL);
5474           break;
5475         case DataType::Type::kUint16:
5476           __ movzxw(EAX, EAX);
5477           break;
5478         case DataType::Type::kInt16:
5479           __ movsxw(EAX, EAX);
5480           break;
5481         case DataType::Type::kFloat32:
5482           __ fstps(Address(ESP, 0));
5483           __ movss(XMM0, Address(ESP, 0));
5484           break;
5485         case DataType::Type::kFloat64:
5486           __ fstpl(Address(ESP, 0));
5487           __ movsd(XMM0, Address(ESP, 0));
5488           break;
5489         case DataType::Type::kInt32:
5490         case DataType::Type::kInt64:
5491         case DataType::Type::kVoid:
5492           break;
5493         default:
5494           DCHECK(false) << invoke->GetType();
5495           break;
5496       }
5497       if (out_frame_size != 0u) {
5498         DecreaseFrame(out_frame_size);
5499       }
5500       break;
5501     }
5502     case CodePtrLocation::kCallArtMethod:
5503       // (callee_method + offset_of_quick_compiled_code)()
5504       __ call(Address(callee_method.AsRegister<Register>(),
5505                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5506                           kX86PointerSize).Int32Value()));
5507       RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5508       break;
5509   }
5510 
5511   DCHECK(!IsLeafMethod());
5512 }
5513 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5514 void CodeGeneratorX86::GenerateVirtualCall(
5515     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5516   Register temp = temp_in.AsRegister<Register>();
5517   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5518       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5519 
5520   // Use the calling convention instead of the location of the receiver, as
5521   // intrinsics may have put the receiver in a different register. In the intrinsics
5522   // slow path, the arguments have been moved to the right place, so here we are
5523   // guaranteed that the receiver is the first register of the calling convention.
5524   InvokeDexCallingConvention calling_convention;
5525   Register receiver = calling_convention.GetRegisterAt(0);
5526   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5527   // /* HeapReference<Class> */ temp = receiver->klass_
5528   __ movl(temp, Address(receiver, class_offset));
5529   MaybeRecordImplicitNullCheck(invoke);
5530   // Instead of simply (possibly) unpoisoning `temp` here, we should
5531   // emit a read barrier for the previous class reference load.
5532   // However this is not required in practice, as this is an
5533   // intermediate/temporary reference and because the current
5534   // concurrent copying collector keeps the from-space memory
5535   // intact/accessible until the end of the marking phase (the
5536   // concurrent copying collector may not in the future).
5537   __ MaybeUnpoisonHeapReference(temp);
5538 
5539   MaybeGenerateInlineCacheCheck(invoke, temp);
5540 
5541   // temp = temp->GetMethodAt(method_offset);
5542   __ movl(temp, Address(temp, method_offset));
5543   // call temp->GetEntryPoint();
5544   __ call(Address(
5545       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5546   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5547 }
5548 
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5549 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5550                                                      uint32_t intrinsic_data) {
5551   boot_image_other_patches_.emplace_back(
5552       method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5553   __ Bind(&boot_image_other_patches_.back().label);
5554 }
5555 
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5556 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5557                                                  uint32_t boot_image_offset) {
5558   boot_image_other_patches_.emplace_back(
5559       method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5560   __ Bind(&boot_image_other_patches_.back().label);
5561 }
5562 
RecordBootImageMethodPatch(HInvoke * invoke)5563 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvoke* invoke) {
5564   size_t index = invoke->IsInvokeInterface()
5565       ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5566       : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5567   HX86ComputeBaseMethodAddress* method_address =
5568       invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5569   boot_image_method_patches_.emplace_back(
5570       method_address,
5571       invoke->GetResolvedMethodReference().dex_file,
5572       invoke->GetResolvedMethodReference().index);
5573   __ Bind(&boot_image_method_patches_.back().label);
5574 }
5575 
RecordMethodBssEntryPatch(HInvoke * invoke)5576 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvoke* invoke) {
5577   size_t index = invoke->IsInvokeInterface()
5578       ? invoke->AsInvokeInterface()->GetSpecialInputIndex()
5579       : invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
5580   DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
5581          GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
5582          ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
5583                          invoke->GetMethodReference().dex_file));
5584   HX86ComputeBaseMethodAddress* method_address =
5585       invoke->InputAt(index)->AsX86ComputeBaseMethodAddress();
5586   // Add the patch entry and bind its label at the end of the instruction.
5587   method_bss_entry_patches_.emplace_back(
5588       method_address,
5589       invoke->GetMethodReference().dex_file,
5590       invoke->GetMethodReference().index);
5591   __ Bind(&method_bss_entry_patches_.back().label);
5592 }
5593 
RecordBootImageTypePatch(HLoadClass * load_class)5594 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5595   HX86ComputeBaseMethodAddress* method_address =
5596       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5597   boot_image_type_patches_.emplace_back(
5598       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5599   __ Bind(&boot_image_type_patches_.back().label);
5600 }
5601 
NewTypeBssEntryPatch(HLoadClass * load_class)5602 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5603   HX86ComputeBaseMethodAddress* method_address =
5604       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5605   ArenaDeque<X86PcRelativePatchInfo>* patches = nullptr;
5606   switch (load_class->GetLoadKind()) {
5607     case HLoadClass::LoadKind::kBssEntry:
5608       patches = &type_bss_entry_patches_;
5609       break;
5610     case HLoadClass::LoadKind::kBssEntryPublic:
5611       patches = &public_type_bss_entry_patches_;
5612       break;
5613     case HLoadClass::LoadKind::kBssEntryPackage:
5614       patches = &package_type_bss_entry_patches_;
5615       break;
5616     default:
5617       LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
5618       UNREACHABLE();
5619   }
5620   patches->emplace_back(
5621       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5622   return &patches->back().label;
5623 }
5624 
RecordBootImageStringPatch(HLoadString * load_string)5625 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5626   HX86ComputeBaseMethodAddress* method_address =
5627       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5628   boot_image_string_patches_.emplace_back(
5629       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5630   __ Bind(&boot_image_string_patches_.back().label);
5631 }
5632 
NewStringBssEntryPatch(HLoadString * load_string)5633 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5634   HX86ComputeBaseMethodAddress* method_address =
5635       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5636   string_bss_entry_patches_.emplace_back(
5637       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5638   return &string_bss_entry_patches_.back().label;
5639 }
5640 
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)5641 void CodeGeneratorX86::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
5642   HX86ComputeBaseMethodAddress* method_address =
5643       invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5644   boot_image_jni_entrypoint_patches_.emplace_back(
5645       method_address,
5646       invoke->GetResolvedMethodReference().dex_file,
5647       invoke->GetResolvedMethodReference().index);
5648   __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
5649 }
5650 
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5651 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5652                                             uint32_t boot_image_reference,
5653                                             HInvokeStaticOrDirect* invoke) {
5654   if (GetCompilerOptions().IsBootImage()) {
5655     HX86ComputeBaseMethodAddress* method_address =
5656         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5657     DCHECK(method_address != nullptr);
5658     Register method_address_reg =
5659         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5660     __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5661     RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5662   } else if (GetCompilerOptions().GetCompilePic()) {
5663     HX86ComputeBaseMethodAddress* method_address =
5664         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5665     DCHECK(method_address != nullptr);
5666     Register method_address_reg =
5667         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5668     __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5669     RecordBootImageRelRoPatch(method_address, boot_image_reference);
5670   } else {
5671     DCHECK(GetCompilerOptions().IsJitCompiler());
5672     gc::Heap* heap = Runtime::Current()->GetHeap();
5673     DCHECK(!heap->GetBootImageSpaces().empty());
5674     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5675     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5676   }
5677 }
5678 
LoadIntrinsicDeclaringClass(Register reg,HInvokeStaticOrDirect * invoke)5679 void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
5680   DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
5681   if (GetCompilerOptions().IsBootImage()) {
5682     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5683     HX86ComputeBaseMethodAddress* method_address =
5684         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5685     DCHECK(method_address != nullptr);
5686     Register method_address_reg =
5687         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5688     __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kPlaceholder32BitOffset));
5689     MethodReference target_method = invoke->GetResolvedMethodReference();
5690     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5691     boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5692     __ Bind(&boot_image_type_patches_.back().label);
5693   } else {
5694     uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
5695     LoadBootImageAddress(reg, boot_image_offset, invoke);
5696   }
5697 }
5698 
5699 // The label points to the end of the "movl" or another instruction but the literal offset
5700 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5701 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5702 
5703 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5704 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5705     const ArenaDeque<X86PcRelativePatchInfo>& infos,
5706     ArenaVector<linker::LinkerPatch>* linker_patches) {
5707   for (const X86PcRelativePatchInfo& info : infos) {
5708     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5709     linker_patches->push_back(Factory(literal_offset,
5710                                       info.target_dex_file,
5711                                       GetMethodAddressOffset(info.method_address),
5712                                       info.offset_or_index));
5713   }
5714 }
5715 
5716 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5717 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5718                                      const DexFile* target_dex_file,
5719                                      uint32_t pc_insn_offset,
5720                                      uint32_t boot_image_offset) {
5721   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
5722   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5723 }
5724 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5725 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5726   DCHECK(linker_patches->empty());
5727   size_t size =
5728       boot_image_method_patches_.size() +
5729       method_bss_entry_patches_.size() +
5730       boot_image_type_patches_.size() +
5731       type_bss_entry_patches_.size() +
5732       public_type_bss_entry_patches_.size() +
5733       package_type_bss_entry_patches_.size() +
5734       boot_image_string_patches_.size() +
5735       string_bss_entry_patches_.size() +
5736       boot_image_jni_entrypoint_patches_.size() +
5737       boot_image_other_patches_.size();
5738   linker_patches->reserve(size);
5739   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5740     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5741         boot_image_method_patches_, linker_patches);
5742     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5743         boot_image_type_patches_, linker_patches);
5744     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5745         boot_image_string_patches_, linker_patches);
5746   } else {
5747     DCHECK(boot_image_method_patches_.empty());
5748     DCHECK(boot_image_type_patches_.empty());
5749     DCHECK(boot_image_string_patches_.empty());
5750   }
5751   if (GetCompilerOptions().IsBootImage()) {
5752     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5753         boot_image_other_patches_, linker_patches);
5754   } else {
5755     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5756         boot_image_other_patches_, linker_patches);
5757   }
5758   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5759       method_bss_entry_patches_, linker_patches);
5760   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5761       type_bss_entry_patches_, linker_patches);
5762   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
5763       public_type_bss_entry_patches_, linker_patches);
5764   EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
5765       package_type_bss_entry_patches_, linker_patches);
5766   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5767       string_bss_entry_patches_, linker_patches);
5768   EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
5769       boot_image_jni_entrypoint_patches_, linker_patches);
5770   DCHECK_EQ(size, linker_patches->size());
5771 }
5772 
MarkGCCard(Register temp,Register card,Register object,Register value,bool emit_null_check)5773 void CodeGeneratorX86::MarkGCCard(
5774     Register temp, Register card, Register object, Register value, bool emit_null_check) {
5775   NearLabel is_null;
5776   if (emit_null_check) {
5777     __ testl(value, value);
5778     __ j(kEqual, &is_null);
5779   }
5780   // Load the address of the card table into `card`.
5781   __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5782   // Calculate the offset (in the card table) of the card corresponding to
5783   // `object`.
5784   __ movl(temp, object);
5785   __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5786   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5787   // `object`'s card.
5788   //
5789   // Register `card` contains the address of the card table. Note that the card
5790   // table's base is biased during its creation so that it always starts at an
5791   // address whose least-significant byte is equal to `kCardDirty` (see
5792   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5793   // below writes the `kCardDirty` (byte) value into the `object`'s card
5794   // (located at `card + object >> kCardShift`).
5795   //
5796   // This dual use of the value in register `card` (1. to calculate the location
5797   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5798   // (no need to explicitly load `kCardDirty` as an immediate value).
5799   __ movb(Address(temp, card, TIMES_1, 0),
5800           X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5801   if (emit_null_check) {
5802     __ Bind(&is_null);
5803   }
5804 }
5805 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5806 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5807   DCHECK(instruction->IsInstanceFieldGet() ||
5808          instruction->IsStaticFieldGet() ||
5809          instruction->IsPredicatedInstanceFieldGet());
5810 
5811   bool object_field_get_with_read_barrier =
5812       gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5813   bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
5814   LocationSummary* locations =
5815       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5816                                                        gUseReadBarrier
5817                                                            ? LocationSummary::kCallOnSlowPath
5818                                                            : LocationSummary::kNoCall);
5819   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5820     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5821   }
5822   // receiver_input
5823   locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
5824   if (is_predicated) {
5825     if (DataType::IsFloatingPointType(instruction->GetType())) {
5826       locations->SetInAt(0, Location::RequiresFpuRegister());
5827     } else {
5828       locations->SetInAt(0, Location::RequiresRegister());
5829     }
5830   }
5831   if (DataType::IsFloatingPointType(instruction->GetType())) {
5832     locations->SetOut(is_predicated ? Location::SameAsFirstInput()
5833                                     : Location::RequiresFpuRegister());
5834   } else {
5835     // The output overlaps in case of long: we don't want the low move
5836     // to overwrite the object's location.  Likewise, in the case of
5837     // an object field get with read barriers enabled, we do not want
5838     // the move to overwrite the object's location, as we need it to emit
5839     // the read barrier.
5840     locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
5841                       (object_field_get_with_read_barrier ||
5842                        instruction->GetType() == DataType::Type::kInt64 ||
5843                        is_predicated)
5844                           ? Location::kOutputOverlap
5845                           : Location::kNoOutputOverlap);
5846   }
5847 
5848   if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5849     // Long values can be loaded atomically into an XMM using movsd.
5850     // So we use an XMM register as a temp to achieve atomicity (first
5851     // load the temp into the XMM and then copy the XMM into the
5852     // output, 32 bits at a time).
5853     locations->AddTemp(Location::RequiresFpuRegister());
5854   }
5855 }
5856 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5857 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5858                                                  const FieldInfo& field_info) {
5859   DCHECK(instruction->IsInstanceFieldGet() ||
5860          instruction->IsStaticFieldGet() ||
5861          instruction->IsPredicatedInstanceFieldGet());
5862 
5863   LocationSummary* locations = instruction->GetLocations();
5864   Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
5865   Register base = base_loc.AsRegister<Register>();
5866   Location out = locations->Out();
5867   bool is_volatile = field_info.IsVolatile();
5868   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5869   DataType::Type load_type = instruction->GetType();
5870   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5871 
5872   if (load_type == DataType::Type::kReference) {
5873     // /* HeapReference<Object> */ out = *(base + offset)
5874     if (gUseReadBarrier && kUseBakerReadBarrier) {
5875       // Note that a potential implicit null check is handled in this
5876       // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5877       codegen_->GenerateFieldLoadWithBakerReadBarrier(
5878           instruction, out, base, offset, /* needs_null_check= */ true);
5879       if (is_volatile) {
5880         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5881       }
5882     } else {
5883       __ movl(out.AsRegister<Register>(), Address(base, offset));
5884       codegen_->MaybeRecordImplicitNullCheck(instruction);
5885       if (is_volatile) {
5886         codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5887       }
5888       // If read barriers are enabled, emit read barriers other than
5889       // Baker's using a slow path (and also unpoison the loaded
5890       // reference, if heap poisoning is enabled).
5891       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5892     }
5893   } else {
5894     Address src(base, offset);
5895     XmmRegister temp = (load_type == DataType::Type::kInt64 && is_volatile)
5896         ? locations->GetTemp(0).AsFpuRegister<XmmRegister>()
5897         : kNoXmmRegister;
5898     codegen_->LoadFromMemoryNoBarrier(load_type, out, src, instruction, temp, is_volatile);
5899     if (is_volatile) {
5900       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5901     }
5902   }
5903 }
5904 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5905 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction,
5906                                          const FieldInfo& field_info,
5907                                          WriteBarrierKind write_barrier_kind) {
5908   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5909 
5910   LocationSummary* locations =
5911       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5912   locations->SetInAt(0, Location::RequiresRegister());
5913   bool is_volatile = field_info.IsVolatile();
5914   DataType::Type field_type = field_info.GetFieldType();
5915   bool is_byte_type = DataType::Size(field_type) == 1u;
5916 
5917   // The register allocator does not support multiple
5918   // inputs that die at entry with one in a specific register.
5919   if (is_byte_type) {
5920     // Ensure the value is in a byte register.
5921     locations->SetInAt(1, Location::RegisterLocation(EAX));
5922   } else if (DataType::IsFloatingPointType(field_type)) {
5923     if (is_volatile && field_type == DataType::Type::kFloat64) {
5924       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5925       locations->SetInAt(1, Location::RequiresFpuRegister());
5926     } else {
5927       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5928     }
5929   } else if (is_volatile && field_type == DataType::Type::kInt64) {
5930     // In order to satisfy the semantics of volatile, this must be a single instruction store.
5931     locations->SetInAt(1, Location::RequiresRegister());
5932 
5933     // 64bits value can be atomically written to an address with movsd and an XMM register.
5934     // We need two XMM registers because there's no easier way to (bit) copy a register pair
5935     // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5936     // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5937     // isolated cases when we need this it isn't worth adding the extra complexity.
5938     locations->AddTemp(Location::RequiresFpuRegister());
5939     locations->AddTemp(Location::RequiresFpuRegister());
5940   } else {
5941     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5942 
5943     if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5944       if (write_barrier_kind != WriteBarrierKind::kDontEmit) {
5945         locations->AddTemp(Location::RequiresRegister());
5946         // Ensure the card is in a byte register.
5947         locations->AddTemp(Location::RegisterLocation(ECX));
5948       } else if (kPoisonHeapReferences) {
5949         locations->AddTemp(Location::RequiresRegister());
5950       }
5951     }
5952   }
5953 }
5954 
HandleFieldSet(HInstruction * instruction,uint32_t value_index,DataType::Type field_type,Address field_addr,Register base,bool is_volatile,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5955 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5956                                                  uint32_t value_index,
5957                                                  DataType::Type field_type,
5958                                                  Address field_addr,
5959                                                  Register base,
5960                                                  bool is_volatile,
5961                                                  bool value_can_be_null,
5962                                                  WriteBarrierKind write_barrier_kind) {
5963   LocationSummary* locations = instruction->GetLocations();
5964   Location value = locations->InAt(value_index);
5965   bool needs_write_barrier =
5966       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index));
5967 
5968   if (is_volatile) {
5969     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5970   }
5971 
5972   bool maybe_record_implicit_null_check_done = false;
5973 
5974   switch (field_type) {
5975     case DataType::Type::kBool:
5976     case DataType::Type::kUint8:
5977     case DataType::Type::kInt8: {
5978       if (value.IsConstant()) {
5979         __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5980       } else {
5981         __ movb(field_addr, value.AsRegister<ByteRegister>());
5982       }
5983       break;
5984     }
5985 
5986     case DataType::Type::kUint16:
5987     case DataType::Type::kInt16: {
5988       if (value.IsConstant()) {
5989         __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5990       } else {
5991         __ movw(field_addr, value.AsRegister<Register>());
5992       }
5993       break;
5994     }
5995 
5996     case DataType::Type::kInt32:
5997     case DataType::Type::kReference: {
5998       if (kPoisonHeapReferences && needs_write_barrier) {
5999         // Note that in the case where `value` is a null reference,
6000         // we do not enter this block, as the reference does not
6001         // need poisoning.
6002         DCHECK_EQ(field_type, DataType::Type::kReference);
6003         Register temp = locations->GetTemp(0).AsRegister<Register>();
6004         __ movl(temp, value.AsRegister<Register>());
6005         __ PoisonHeapReference(temp);
6006         __ movl(field_addr, temp);
6007       } else if (value.IsConstant()) {
6008         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6009         __ movl(field_addr, Immediate(v));
6010       } else {
6011         DCHECK(value.IsRegister()) << value;
6012         __ movl(field_addr, value.AsRegister<Register>());
6013       }
6014       break;
6015     }
6016 
6017     case DataType::Type::kInt64: {
6018       if (is_volatile) {
6019         XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
6020         XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
6021         __ movd(temp1, value.AsRegisterPairLow<Register>());
6022         __ movd(temp2, value.AsRegisterPairHigh<Register>());
6023         __ punpckldq(temp1, temp2);
6024         __ movsd(field_addr, temp1);
6025         codegen_->MaybeRecordImplicitNullCheck(instruction);
6026       } else if (value.IsConstant()) {
6027         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6028         __ movl(field_addr, Immediate(Low32Bits(v)));
6029         codegen_->MaybeRecordImplicitNullCheck(instruction);
6030         __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6031       } else {
6032         __ movl(field_addr, value.AsRegisterPairLow<Register>());
6033         codegen_->MaybeRecordImplicitNullCheck(instruction);
6034         __ movl(Address::displace(field_addr, kX86WordSize), value.AsRegisterPairHigh<Register>());
6035       }
6036       maybe_record_implicit_null_check_done = true;
6037       break;
6038     }
6039 
6040     case DataType::Type::kFloat32: {
6041       if (value.IsConstant()) {
6042         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6043         __ movl(field_addr, Immediate(v));
6044       } else {
6045         __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
6046       }
6047       break;
6048     }
6049 
6050     case DataType::Type::kFloat64: {
6051       if (value.IsConstant()) {
6052         DCHECK(!is_volatile);
6053         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
6054         __ movl(field_addr, Immediate(Low32Bits(v)));
6055         codegen_->MaybeRecordImplicitNullCheck(instruction);
6056         __ movl(Address::displace(field_addr, kX86WordSize), Immediate(High32Bits(v)));
6057         maybe_record_implicit_null_check_done = true;
6058       } else {
6059         __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
6060       }
6061       break;
6062     }
6063 
6064     case DataType::Type::kUint32:
6065     case DataType::Type::kUint64:
6066     case DataType::Type::kVoid:
6067       LOG(FATAL) << "Unreachable type " << field_type;
6068       UNREACHABLE();
6069   }
6070 
6071   if (!maybe_record_implicit_null_check_done) {
6072     codegen_->MaybeRecordImplicitNullCheck(instruction);
6073   }
6074 
6075   if (needs_write_barrier && write_barrier_kind != WriteBarrierKind::kDontEmit) {
6076     Register temp = locations->GetTemp(0).AsRegister<Register>();
6077     Register card = locations->GetTemp(1).AsRegister<Register>();
6078     codegen_->MarkGCCard(
6079         temp,
6080         card,
6081         base,
6082         value.AsRegister<Register>(),
6083         value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
6084   }
6085 
6086   if (is_volatile) {
6087     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
6088   }
6089 }
6090 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)6091 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
6092                                                  const FieldInfo& field_info,
6093                                                  bool value_can_be_null,
6094                                                  WriteBarrierKind write_barrier_kind) {
6095   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
6096 
6097   LocationSummary* locations = instruction->GetLocations();
6098   Register base = locations->InAt(0).AsRegister<Register>();
6099   bool is_volatile = field_info.IsVolatile();
6100   DataType::Type field_type = field_info.GetFieldType();
6101   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
6102   bool is_predicated =
6103       instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
6104 
6105   Address field_addr(base, offset);
6106 
6107   NearLabel pred_is_null;
6108   if (is_predicated) {
6109     __ testl(base, base);
6110     __ j(kEqual, &pred_is_null);
6111   }
6112 
6113   HandleFieldSet(instruction,
6114                  /* value_index= */ 1,
6115                  field_type,
6116                  field_addr,
6117                  base,
6118                  is_volatile,
6119                  value_can_be_null,
6120                  write_barrier_kind);
6121 
6122   if (is_predicated) {
6123     __ Bind(&pred_is_null);
6124   }
6125 }
6126 
VisitStaticFieldGet(HStaticFieldGet * instruction)6127 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6128   HandleFieldGet(instruction, instruction->GetFieldInfo());
6129 }
6130 
VisitStaticFieldGet(HStaticFieldGet * instruction)6131 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
6132   HandleFieldGet(instruction, instruction->GetFieldInfo());
6133 }
6134 
VisitStaticFieldSet(HStaticFieldSet * instruction)6135 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6136   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6137 }
6138 
VisitStaticFieldSet(HStaticFieldSet * instruction)6139 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
6140   HandleFieldSet(instruction,
6141                  instruction->GetFieldInfo(),
6142                  instruction->GetValueCanBeNull(),
6143                  instruction->GetWriteBarrierKind());
6144 }
6145 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6146 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6147   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
6148 }
6149 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)6150 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
6151   HandleFieldSet(instruction,
6152                  instruction->GetFieldInfo(),
6153                  instruction->GetValueCanBeNull(),
6154                  instruction->GetWriteBarrierKind());
6155 }
6156 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6157 void LocationsBuilderX86::VisitPredicatedInstanceFieldGet(
6158     HPredicatedInstanceFieldGet* instruction) {
6159   HandleFieldGet(instruction, instruction->GetFieldInfo());
6160 }
6161 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6162 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6163   HandleFieldGet(instruction, instruction->GetFieldInfo());
6164 }
6165 
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)6166 void InstructionCodeGeneratorX86::VisitPredicatedInstanceFieldGet(
6167     HPredicatedInstanceFieldGet* instruction) {
6168   NearLabel finish;
6169   LocationSummary* locations = instruction->GetLocations();
6170   Register recv = locations->InAt(1).AsRegister<Register>();
6171   __ testl(recv, recv);
6172   __ j(kZero, &finish);
6173   HandleFieldGet(instruction, instruction->GetFieldInfo());
6174   __ Bind(&finish);
6175 }
VisitInstanceFieldGet(HInstanceFieldGet * instruction)6176 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
6177   HandleFieldGet(instruction, instruction->GetFieldInfo());
6178 }
6179 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6180 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6181   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
6182 }
6183 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)6184 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
6185   __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
6186   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
6187 }
6188 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6189 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
6190     HUnresolvedInstanceFieldGet* instruction) {
6191   FieldAccessCallingConventionX86 calling_convention;
6192   codegen_->CreateUnresolvedFieldLocationSummary(
6193       instruction, instruction->GetFieldType(), calling_convention);
6194 }
6195 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)6196 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
6197     HUnresolvedInstanceFieldGet* instruction) {
6198   FieldAccessCallingConventionX86 calling_convention;
6199   codegen_->GenerateUnresolvedFieldAccess(instruction,
6200                                           instruction->GetFieldType(),
6201                                           instruction->GetFieldIndex(),
6202                                           instruction->GetDexPc(),
6203                                           calling_convention);
6204 }
6205 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6206 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
6207     HUnresolvedInstanceFieldSet* instruction) {
6208   FieldAccessCallingConventionX86 calling_convention;
6209   codegen_->CreateUnresolvedFieldLocationSummary(
6210       instruction, instruction->GetFieldType(), calling_convention);
6211 }
6212 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)6213 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
6214     HUnresolvedInstanceFieldSet* instruction) {
6215   FieldAccessCallingConventionX86 calling_convention;
6216   codegen_->GenerateUnresolvedFieldAccess(instruction,
6217                                           instruction->GetFieldType(),
6218                                           instruction->GetFieldIndex(),
6219                                           instruction->GetDexPc(),
6220                                           calling_convention);
6221 }
6222 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6223 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
6224     HUnresolvedStaticFieldGet* instruction) {
6225   FieldAccessCallingConventionX86 calling_convention;
6226   codegen_->CreateUnresolvedFieldLocationSummary(
6227       instruction, instruction->GetFieldType(), calling_convention);
6228 }
6229 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)6230 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
6231     HUnresolvedStaticFieldGet* instruction) {
6232   FieldAccessCallingConventionX86 calling_convention;
6233   codegen_->GenerateUnresolvedFieldAccess(instruction,
6234                                           instruction->GetFieldType(),
6235                                           instruction->GetFieldIndex(),
6236                                           instruction->GetDexPc(),
6237                                           calling_convention);
6238 }
6239 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6240 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
6241     HUnresolvedStaticFieldSet* instruction) {
6242   FieldAccessCallingConventionX86 calling_convention;
6243   codegen_->CreateUnresolvedFieldLocationSummary(
6244       instruction, instruction->GetFieldType(), calling_convention);
6245 }
6246 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)6247 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
6248     HUnresolvedStaticFieldSet* instruction) {
6249   FieldAccessCallingConventionX86 calling_convention;
6250   codegen_->GenerateUnresolvedFieldAccess(instruction,
6251                                           instruction->GetFieldType(),
6252                                           instruction->GetFieldIndex(),
6253                                           instruction->GetDexPc(),
6254                                           calling_convention);
6255 }
6256 
VisitNullCheck(HNullCheck * instruction)6257 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
6258   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
6259   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
6260       ? Location::RequiresRegister()
6261       : Location::Any();
6262   locations->SetInAt(0, loc);
6263 }
6264 
GenerateImplicitNullCheck(HNullCheck * instruction)6265 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
6266   if (CanMoveNullCheckToUser(instruction)) {
6267     return;
6268   }
6269   LocationSummary* locations = instruction->GetLocations();
6270   Location obj = locations->InAt(0);
6271 
6272   __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
6273   RecordPcInfo(instruction, instruction->GetDexPc());
6274 }
6275 
GenerateExplicitNullCheck(HNullCheck * instruction)6276 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
6277   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
6278   AddSlowPath(slow_path);
6279 
6280   LocationSummary* locations = instruction->GetLocations();
6281   Location obj = locations->InAt(0);
6282 
6283   if (obj.IsRegister()) {
6284     __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
6285   } else if (obj.IsStackSlot()) {
6286     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
6287   } else {
6288     DCHECK(obj.IsConstant()) << obj;
6289     DCHECK(obj.GetConstant()->IsNullConstant());
6290     __ jmp(slow_path->GetEntryLabel());
6291     return;
6292   }
6293   __ j(kEqual, slow_path->GetEntryLabel());
6294 }
6295 
VisitNullCheck(HNullCheck * instruction)6296 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
6297   codegen_->GenerateNullCheck(instruction);
6298 }
6299 
VisitArrayGet(HArrayGet * instruction)6300 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
6301   bool object_array_get_with_read_barrier =
6302       gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
6303   LocationSummary* locations =
6304       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
6305                                                        object_array_get_with_read_barrier
6306                                                            ? LocationSummary::kCallOnSlowPath
6307                                                            : LocationSummary::kNoCall);
6308   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
6309     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6310   }
6311   locations->SetInAt(0, Location::RequiresRegister());
6312   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6313   if (DataType::IsFloatingPointType(instruction->GetType())) {
6314     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
6315   } else {
6316     // The output overlaps in case of long: we don't want the low move
6317     // to overwrite the array's location.  Likewise, in the case of an
6318     // object array get with read barriers enabled, we do not want the
6319     // move to overwrite the array's location, as we need it to emit
6320     // the read barrier.
6321     locations->SetOut(
6322         Location::RequiresRegister(),
6323         (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
6324             ? Location::kOutputOverlap
6325             : Location::kNoOutputOverlap);
6326   }
6327 }
6328 
VisitArrayGet(HArrayGet * instruction)6329 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
6330   LocationSummary* locations = instruction->GetLocations();
6331   Location obj_loc = locations->InAt(0);
6332   Register obj = obj_loc.AsRegister<Register>();
6333   Location index = locations->InAt(1);
6334   Location out_loc = locations->Out();
6335   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
6336 
6337   DataType::Type type = instruction->GetType();
6338   if (type == DataType::Type::kReference) {
6339     static_assert(
6340         sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6341         "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6342     // /* HeapReference<Object> */ out =
6343     //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6344     if (gUseReadBarrier && kUseBakerReadBarrier) {
6345       // Note that a potential implicit null check is handled in this
6346       // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
6347       codegen_->GenerateArrayLoadWithBakerReadBarrier(
6348           instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
6349     } else {
6350       Register out = out_loc.AsRegister<Register>();
6351       __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
6352       codegen_->MaybeRecordImplicitNullCheck(instruction);
6353       // If read barriers are enabled, emit read barriers other than
6354       // Baker's using a slow path (and also unpoison the loaded
6355       // reference, if heap poisoning is enabled).
6356       if (index.IsConstant()) {
6357         uint32_t offset =
6358             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
6359         codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
6360       } else {
6361         codegen_->MaybeGenerateReadBarrierSlow(
6362             instruction, out_loc, out_loc, obj_loc, data_offset, index);
6363       }
6364     }
6365   } else if (type == DataType::Type::kUint16
6366       && mirror::kUseStringCompression
6367       && instruction->IsStringCharAt()) {
6368     // Branch cases into compressed and uncompressed for each index's type.
6369     Register out = out_loc.AsRegister<Register>();
6370     uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
6371     NearLabel done, not_compressed;
6372     __ testb(Address(obj, count_offset), Immediate(1));
6373     codegen_->MaybeRecordImplicitNullCheck(instruction);
6374     static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
6375                   "Expecting 0=compressed, 1=uncompressed");
6376     __ j(kNotZero, &not_compressed);
6377     __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
6378     __ jmp(&done);
6379     __ Bind(&not_compressed);
6380     __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
6381     __ Bind(&done);
6382   } else {
6383     ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
6384     Address src = CodeGeneratorX86::ArrayAddress(obj, index, scale, data_offset);
6385     codegen_->LoadFromMemoryNoBarrier(type, out_loc, src, instruction);
6386   }
6387 }
6388 
VisitArraySet(HArraySet * instruction)6389 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
6390   DataType::Type value_type = instruction->GetComponentType();
6391 
6392   bool needs_write_barrier =
6393       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6394   bool needs_type_check = instruction->NeedsTypeCheck();
6395 
6396   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6397       instruction,
6398       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
6399 
6400   bool is_byte_type = DataType::Size(value_type) == 1u;
6401   // We need the inputs to be different than the output in case of long operation.
6402   // In case of a byte operation, the register allocator does not support multiple
6403   // inputs that die at entry with one in a specific register.
6404   locations->SetInAt(0, Location::RequiresRegister());
6405   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6406   if (is_byte_type) {
6407     // Ensure the value is in a byte register.
6408     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
6409   } else if (DataType::IsFloatingPointType(value_type)) {
6410     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
6411   } else {
6412     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
6413   }
6414   if (needs_write_barrier) {
6415     // Used by reference poisoning or emitting write barrier.
6416     locations->AddTemp(Location::RequiresRegister());
6417     if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
6418       // Only used when emitting a write barrier. Ensure the card is in a byte register.
6419       locations->AddTemp(Location::RegisterLocation(ECX));
6420     }
6421   }
6422 }
6423 
VisitArraySet(HArraySet * instruction)6424 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
6425   LocationSummary* locations = instruction->GetLocations();
6426   Location array_loc = locations->InAt(0);
6427   Register array = array_loc.AsRegister<Register>();
6428   Location index = locations->InAt(1);
6429   Location value = locations->InAt(2);
6430   DataType::Type value_type = instruction->GetComponentType();
6431   bool needs_type_check = instruction->NeedsTypeCheck();
6432   bool needs_write_barrier =
6433       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
6434 
6435   switch (value_type) {
6436     case DataType::Type::kBool:
6437     case DataType::Type::kUint8:
6438     case DataType::Type::kInt8: {
6439       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
6440       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
6441       if (value.IsRegister()) {
6442         __ movb(address, value.AsRegister<ByteRegister>());
6443       } else {
6444         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
6445       }
6446       codegen_->MaybeRecordImplicitNullCheck(instruction);
6447       break;
6448     }
6449 
6450     case DataType::Type::kUint16:
6451     case DataType::Type::kInt16: {
6452       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
6453       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
6454       if (value.IsRegister()) {
6455         __ movw(address, value.AsRegister<Register>());
6456       } else {
6457         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
6458       }
6459       codegen_->MaybeRecordImplicitNullCheck(instruction);
6460       break;
6461     }
6462 
6463     case DataType::Type::kReference: {
6464       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6465       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6466 
6467       if (!value.IsRegister()) {
6468         // Just setting null.
6469         DCHECK(instruction->InputAt(2)->IsNullConstant());
6470         DCHECK(value.IsConstant()) << value;
6471         __ movl(address, Immediate(0));
6472         codegen_->MaybeRecordImplicitNullCheck(instruction);
6473         DCHECK(!needs_write_barrier);
6474         DCHECK(!needs_type_check);
6475         break;
6476       }
6477 
6478       DCHECK(needs_write_barrier);
6479       Register register_value = value.AsRegister<Register>();
6480       Location temp_loc = locations->GetTemp(0);
6481       Register temp = temp_loc.AsRegister<Register>();
6482 
6483       bool can_value_be_null = instruction->GetValueCanBeNull();
6484       NearLabel do_store;
6485       if (can_value_be_null) {
6486         __ testl(register_value, register_value);
6487         __ j(kEqual, &do_store);
6488       }
6489 
6490       SlowPathCode* slow_path = nullptr;
6491       if (needs_type_check) {
6492         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6493         codegen_->AddSlowPath(slow_path);
6494 
6495         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6496         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6497         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6498 
6499         // Note that when Baker read barriers are enabled, the type
6500         // checks are performed without read barriers.  This is fine,
6501         // even in the case where a class object is in the from-space
6502         // after the flip, as a comparison involving such a type would
6503         // not produce a false positive; it may of course produce a
6504         // false negative, in which case we would take the ArraySet
6505         // slow path.
6506 
6507         // /* HeapReference<Class> */ temp = array->klass_
6508         __ movl(temp, Address(array, class_offset));
6509         codegen_->MaybeRecordImplicitNullCheck(instruction);
6510         __ MaybeUnpoisonHeapReference(temp);
6511 
6512         // /* HeapReference<Class> */ temp = temp->component_type_
6513         __ movl(temp, Address(temp, component_offset));
6514         // If heap poisoning is enabled, no need to unpoison `temp`
6515         // nor the object reference in `register_value->klass`, as
6516         // we are comparing two poisoned references.
6517         __ cmpl(temp, Address(register_value, class_offset));
6518 
6519         if (instruction->StaticTypeOfArrayIsObjectArray()) {
6520           NearLabel do_put;
6521           __ j(kEqual, &do_put);
6522           // If heap poisoning is enabled, the `temp` reference has
6523           // not been unpoisoned yet; unpoison it now.
6524           __ MaybeUnpoisonHeapReference(temp);
6525 
6526           // If heap poisoning is enabled, no need to unpoison the
6527           // heap reference loaded below, as it is only used for a
6528           // comparison with null.
6529           __ cmpl(Address(temp, super_offset), Immediate(0));
6530           __ j(kNotEqual, slow_path->GetEntryLabel());
6531           __ Bind(&do_put);
6532         } else {
6533           __ j(kNotEqual, slow_path->GetEntryLabel());
6534         }
6535       }
6536 
6537       if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
6538         DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
6539             << " Already null checked so we shouldn't do it again.";
6540         Register card = locations->GetTemp(1).AsRegister<Register>();
6541         codegen_->MarkGCCard(temp,
6542                              card,
6543                              array,
6544                              value.AsRegister<Register>(),
6545                              /* emit_null_check= */ false);
6546       }
6547 
6548       if (can_value_be_null) {
6549         DCHECK(do_store.IsLinked());
6550         __ Bind(&do_store);
6551       }
6552 
6553       Register source = register_value;
6554       if (kPoisonHeapReferences) {
6555         __ movl(temp, register_value);
6556         __ PoisonHeapReference(temp);
6557         source = temp;
6558       }
6559 
6560       __ movl(address, source);
6561 
6562       if (can_value_be_null || !needs_type_check) {
6563         codegen_->MaybeRecordImplicitNullCheck(instruction);
6564       }
6565 
6566       if (slow_path != nullptr) {
6567         __ Bind(slow_path->GetExitLabel());
6568       }
6569 
6570       break;
6571     }
6572 
6573     case DataType::Type::kInt32: {
6574       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6575       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6576       if (value.IsRegister()) {
6577         __ movl(address, value.AsRegister<Register>());
6578       } else {
6579         DCHECK(value.IsConstant()) << value;
6580         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6581         __ movl(address, Immediate(v));
6582       }
6583       codegen_->MaybeRecordImplicitNullCheck(instruction);
6584       break;
6585     }
6586 
6587     case DataType::Type::kInt64: {
6588       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6589       if (value.IsRegisterPair()) {
6590         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6591                 value.AsRegisterPairLow<Register>());
6592         codegen_->MaybeRecordImplicitNullCheck(instruction);
6593         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6594                 value.AsRegisterPairHigh<Register>());
6595       } else {
6596         DCHECK(value.IsConstant());
6597         int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6598         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6599                 Immediate(Low32Bits(val)));
6600         codegen_->MaybeRecordImplicitNullCheck(instruction);
6601         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6602                 Immediate(High32Bits(val)));
6603       }
6604       break;
6605     }
6606 
6607     case DataType::Type::kFloat32: {
6608       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6609       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6610       if (value.IsFpuRegister()) {
6611         __ movss(address, value.AsFpuRegister<XmmRegister>());
6612       } else {
6613         DCHECK(value.IsConstant());
6614         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6615         __ movl(address, Immediate(v));
6616       }
6617       codegen_->MaybeRecordImplicitNullCheck(instruction);
6618       break;
6619     }
6620 
6621     case DataType::Type::kFloat64: {
6622       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6623       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6624       if (value.IsFpuRegister()) {
6625         __ movsd(address, value.AsFpuRegister<XmmRegister>());
6626       } else {
6627         DCHECK(value.IsConstant());
6628         Address address_hi =
6629             CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6630         int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6631         __ movl(address, Immediate(Low32Bits(v)));
6632         codegen_->MaybeRecordImplicitNullCheck(instruction);
6633         __ movl(address_hi, Immediate(High32Bits(v)));
6634       }
6635       break;
6636     }
6637 
6638     case DataType::Type::kUint32:
6639     case DataType::Type::kUint64:
6640     case DataType::Type::kVoid:
6641       LOG(FATAL) << "Unreachable type " << instruction->GetType();
6642       UNREACHABLE();
6643   }
6644 }
6645 
VisitArrayLength(HArrayLength * instruction)6646 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6647   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6648   locations->SetInAt(0, Location::RequiresRegister());
6649   if (!instruction->IsEmittedAtUseSite()) {
6650     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6651   }
6652 }
6653 
VisitArrayLength(HArrayLength * instruction)6654 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6655   if (instruction->IsEmittedAtUseSite()) {
6656     return;
6657   }
6658 
6659   LocationSummary* locations = instruction->GetLocations();
6660   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6661   Register obj = locations->InAt(0).AsRegister<Register>();
6662   Register out = locations->Out().AsRegister<Register>();
6663   __ movl(out, Address(obj, offset));
6664   codegen_->MaybeRecordImplicitNullCheck(instruction);
6665   // Mask out most significant bit in case the array is String's array of char.
6666   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6667     __ shrl(out, Immediate(1));
6668   }
6669 }
6670 
VisitBoundsCheck(HBoundsCheck * instruction)6671 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6672   RegisterSet caller_saves = RegisterSet::Empty();
6673   InvokeRuntimeCallingConvention calling_convention;
6674   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6675   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6676   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6677   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6678   HInstruction* length = instruction->InputAt(1);
6679   if (!length->IsEmittedAtUseSite()) {
6680     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6681   }
6682   // Need register to see array's length.
6683   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6684     locations->AddTemp(Location::RequiresRegister());
6685   }
6686 }
6687 
VisitBoundsCheck(HBoundsCheck * instruction)6688 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6689   const bool is_string_compressed_char_at =
6690       mirror::kUseStringCompression && instruction->IsStringCharAt();
6691   LocationSummary* locations = instruction->GetLocations();
6692   Location index_loc = locations->InAt(0);
6693   Location length_loc = locations->InAt(1);
6694   SlowPathCode* slow_path =
6695     new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6696 
6697   if (length_loc.IsConstant()) {
6698     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6699     if (index_loc.IsConstant()) {
6700       // BCE will remove the bounds check if we are guarenteed to pass.
6701       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6702       if (index < 0 || index >= length) {
6703         codegen_->AddSlowPath(slow_path);
6704         __ jmp(slow_path->GetEntryLabel());
6705       } else {
6706         // Some optimization after BCE may have generated this, and we should not
6707         // generate a bounds check if it is a valid range.
6708       }
6709       return;
6710     }
6711 
6712     // We have to reverse the jump condition because the length is the constant.
6713     Register index_reg = index_loc.AsRegister<Register>();
6714     __ cmpl(index_reg, Immediate(length));
6715     codegen_->AddSlowPath(slow_path);
6716     __ j(kAboveEqual, slow_path->GetEntryLabel());
6717   } else {
6718     HInstruction* array_length = instruction->InputAt(1);
6719     if (array_length->IsEmittedAtUseSite()) {
6720       // Address the length field in the array.
6721       DCHECK(array_length->IsArrayLength());
6722       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6723       Location array_loc = array_length->GetLocations()->InAt(0);
6724       Address array_len(array_loc.AsRegister<Register>(), len_offset);
6725       if (is_string_compressed_char_at) {
6726         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6727         // the string compression flag) with the in-memory length and avoid the temporary.
6728         Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6729         __ movl(length_reg, array_len);
6730         codegen_->MaybeRecordImplicitNullCheck(array_length);
6731         __ shrl(length_reg, Immediate(1));
6732         codegen_->GenerateIntCompare(length_reg, index_loc);
6733       } else {
6734         // Checking bounds for general case:
6735         // Array of char or string's array with feature compression off.
6736         if (index_loc.IsConstant()) {
6737           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6738           __ cmpl(array_len, Immediate(value));
6739         } else {
6740           __ cmpl(array_len, index_loc.AsRegister<Register>());
6741         }
6742         codegen_->MaybeRecordImplicitNullCheck(array_length);
6743       }
6744     } else {
6745       codegen_->GenerateIntCompare(length_loc, index_loc);
6746     }
6747     codegen_->AddSlowPath(slow_path);
6748     __ j(kBelowEqual, slow_path->GetEntryLabel());
6749   }
6750 }
6751 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6752 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6753   LOG(FATAL) << "Unreachable";
6754 }
6755 
VisitParallelMove(HParallelMove * instruction)6756 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6757   if (instruction->GetNext()->IsSuspendCheck() &&
6758       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6759     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6760     // The back edge will generate the suspend check.
6761     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6762   }
6763 
6764   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6765 }
6766 
VisitSuspendCheck(HSuspendCheck * instruction)6767 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6768   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6769       instruction, LocationSummary::kCallOnSlowPath);
6770   // In suspend check slow path, usually there are no caller-save registers at all.
6771   // If SIMD instructions are present, however, we force spilling all live SIMD
6772   // registers in full width (since the runtime only saves/restores lower part).
6773   locations->SetCustomSlowPathCallerSaves(
6774       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6775 }
6776 
VisitSuspendCheck(HSuspendCheck * instruction)6777 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6778   HBasicBlock* block = instruction->GetBlock();
6779   if (block->GetLoopInformation() != nullptr) {
6780     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6781     // The back edge will generate the suspend check.
6782     return;
6783   }
6784   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6785     // The goto will generate the suspend check.
6786     return;
6787   }
6788   GenerateSuspendCheck(instruction, nullptr);
6789 }
6790 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6791 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6792                                                        HBasicBlock* successor) {
6793   SuspendCheckSlowPathX86* slow_path =
6794       down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6795   if (slow_path == nullptr) {
6796     slow_path =
6797         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6798     instruction->SetSlowPath(slow_path);
6799     codegen_->AddSlowPath(slow_path);
6800     if (successor != nullptr) {
6801       DCHECK(successor->IsLoopHeader());
6802     }
6803   } else {
6804     DCHECK_EQ(slow_path->GetSuccessor(), successor);
6805   }
6806 
6807   __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6808                  Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6809   if (successor == nullptr) {
6810     __ j(kNotZero, slow_path->GetEntryLabel());
6811     __ Bind(slow_path->GetReturnLabel());
6812   } else {
6813     __ j(kZero, codegen_->GetLabelOf(successor));
6814     __ jmp(slow_path->GetEntryLabel());
6815   }
6816 }
6817 
GetAssembler() const6818 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6819   return codegen_->GetAssembler();
6820 }
6821 
MoveMemoryToMemory(int dst,int src,int number_of_words)6822 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6823   ScratchRegisterScope ensure_scratch(
6824       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6825   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6826   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6827 
6828   // Now that temp register is available (possibly spilled), move blocks of memory.
6829   for (int i = 0; i < number_of_words; i++) {
6830     __ movl(temp_reg, Address(ESP, src + stack_offset));
6831     __ movl(Address(ESP, dst + stack_offset), temp_reg);
6832     stack_offset += kX86WordSize;
6833   }
6834 }
6835 
EmitMove(size_t index)6836 void ParallelMoveResolverX86::EmitMove(size_t index) {
6837   MoveOperands* move = moves_[index];
6838   Location source = move->GetSource();
6839   Location destination = move->GetDestination();
6840 
6841   if (source.IsRegister()) {
6842     if (destination.IsRegister()) {
6843       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6844     } else if (destination.IsFpuRegister()) {
6845       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6846     } else {
6847       DCHECK(destination.IsStackSlot());
6848       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6849     }
6850   } else if (source.IsRegisterPair()) {
6851     if (destination.IsRegisterPair()) {
6852       __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
6853       DCHECK_NE(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
6854       __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
6855     } else if (destination.IsFpuRegister()) {
6856       size_t elem_size = DataType::Size(DataType::Type::kInt32);
6857       // Push the 2 source registers to the stack.
6858       __ pushl(source.AsRegisterPairHigh<Register>());
6859       __ cfi().AdjustCFAOffset(elem_size);
6860       __ pushl(source.AsRegisterPairLow<Register>());
6861       __ cfi().AdjustCFAOffset(elem_size);
6862       // Load the destination register.
6863       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6864       // And remove the temporary stack space we allocated.
6865       codegen_->DecreaseFrame(2 * elem_size);
6866     } else {
6867       DCHECK(destination.IsDoubleStackSlot());
6868       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
6869       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
6870               source.AsRegisterPairHigh<Register>());
6871     }
6872   } else if (source.IsFpuRegister()) {
6873     if (destination.IsRegister()) {
6874       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6875     } else if (destination.IsFpuRegister()) {
6876       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6877     } else if (destination.IsRegisterPair()) {
6878       size_t elem_size = DataType::Size(DataType::Type::kInt32);
6879       // Create stack space for 2 elements.
6880       codegen_->IncreaseFrame(2 * elem_size);
6881       // Store the source register.
6882       __ movsd(Address(ESP, 0), source.AsFpuRegister<XmmRegister>());
6883       // And pop the values into destination registers.
6884       __ popl(destination.AsRegisterPairLow<Register>());
6885       __ cfi().AdjustCFAOffset(-elem_size);
6886       __ popl(destination.AsRegisterPairHigh<Register>());
6887       __ cfi().AdjustCFAOffset(-elem_size);
6888     } else if (destination.IsStackSlot()) {
6889       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6890     } else if (destination.IsDoubleStackSlot()) {
6891       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6892     } else {
6893       DCHECK(destination.IsSIMDStackSlot());
6894       __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6895     }
6896   } else if (source.IsStackSlot()) {
6897     if (destination.IsRegister()) {
6898       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6899     } else if (destination.IsFpuRegister()) {
6900       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6901     } else {
6902       DCHECK(destination.IsStackSlot());
6903       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6904     }
6905   } else if (source.IsDoubleStackSlot()) {
6906     if (destination.IsRegisterPair()) {
6907       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6908       __ movl(destination.AsRegisterPairHigh<Register>(),
6909               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6910     } else if (destination.IsFpuRegister()) {
6911       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6912     } else {
6913       DCHECK(destination.IsDoubleStackSlot()) << destination;
6914       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6915     }
6916   } else if (source.IsSIMDStackSlot()) {
6917     if (destination.IsFpuRegister()) {
6918       __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6919     } else {
6920       DCHECK(destination.IsSIMDStackSlot());
6921       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6922     }
6923   } else if (source.IsConstant()) {
6924     HConstant* constant = source.GetConstant();
6925     if (constant->IsIntConstant() || constant->IsNullConstant()) {
6926       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6927       if (destination.IsRegister()) {
6928         if (value == 0) {
6929           __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6930         } else {
6931           __ movl(destination.AsRegister<Register>(), Immediate(value));
6932         }
6933       } else {
6934         DCHECK(destination.IsStackSlot()) << destination;
6935         __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6936       }
6937     } else if (constant->IsFloatConstant()) {
6938       float fp_value = constant->AsFloatConstant()->GetValue();
6939       int32_t value = bit_cast<int32_t, float>(fp_value);
6940       Immediate imm(value);
6941       if (destination.IsFpuRegister()) {
6942         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6943         if (value == 0) {
6944           // Easy handling of 0.0.
6945           __ xorps(dest, dest);
6946         } else {
6947           ScratchRegisterScope ensure_scratch(
6948               this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6949           Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6950           __ movl(temp, Immediate(value));
6951           __ movd(dest, temp);
6952         }
6953       } else {
6954         DCHECK(destination.IsStackSlot()) << destination;
6955         __ movl(Address(ESP, destination.GetStackIndex()), imm);
6956       }
6957     } else if (constant->IsLongConstant()) {
6958       int64_t value = constant->AsLongConstant()->GetValue();
6959       int32_t low_value = Low32Bits(value);
6960       int32_t high_value = High32Bits(value);
6961       Immediate low(low_value);
6962       Immediate high(high_value);
6963       if (destination.IsDoubleStackSlot()) {
6964         __ movl(Address(ESP, destination.GetStackIndex()), low);
6965         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6966       } else {
6967         __ movl(destination.AsRegisterPairLow<Register>(), low);
6968         __ movl(destination.AsRegisterPairHigh<Register>(), high);
6969       }
6970     } else {
6971       DCHECK(constant->IsDoubleConstant());
6972       double dbl_value = constant->AsDoubleConstant()->GetValue();
6973       int64_t value = bit_cast<int64_t, double>(dbl_value);
6974       int32_t low_value = Low32Bits(value);
6975       int32_t high_value = High32Bits(value);
6976       Immediate low(low_value);
6977       Immediate high(high_value);
6978       if (destination.IsFpuRegister()) {
6979         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6980         if (value == 0) {
6981           // Easy handling of 0.0.
6982           __ xorpd(dest, dest);
6983         } else {
6984           __ pushl(high);
6985           __ cfi().AdjustCFAOffset(4);
6986           __ pushl(low);
6987           __ cfi().AdjustCFAOffset(4);
6988           __ movsd(dest, Address(ESP, 0));
6989           codegen_->DecreaseFrame(8);
6990         }
6991       } else {
6992         DCHECK(destination.IsDoubleStackSlot()) << destination;
6993         __ movl(Address(ESP, destination.GetStackIndex()), low);
6994         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6995       }
6996     }
6997   } else {
6998     LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6999   }
7000 }
7001 
Exchange(Register reg,int mem)7002 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
7003   Register suggested_scratch = reg == EAX ? EBX : EAX;
7004   ScratchRegisterScope ensure_scratch(
7005       this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7006 
7007   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7008   __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
7009   __ movl(Address(ESP, mem + stack_offset), reg);
7010   __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
7011 }
7012 
Exchange32(XmmRegister reg,int mem)7013 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
7014   ScratchRegisterScope ensure_scratch(
7015       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7016 
7017   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
7018   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
7019   __ movl(temp_reg, Address(ESP, mem + stack_offset));
7020   __ movss(Address(ESP, mem + stack_offset), reg);
7021   __ movd(reg, temp_reg);
7022 }
7023 
Exchange128(XmmRegister reg,int mem)7024 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
7025   size_t extra_slot = 4 * kX86WordSize;
7026   codegen_->IncreaseFrame(extra_slot);
7027   __ movups(Address(ESP, 0), XmmRegister(reg));
7028   ExchangeMemory(0, mem + extra_slot, 4);
7029   __ movups(XmmRegister(reg), Address(ESP, 0));
7030   codegen_->DecreaseFrame(extra_slot);
7031 }
7032 
ExchangeMemory(int mem1,int mem2,int number_of_words)7033 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
7034   ScratchRegisterScope ensure_scratch1(
7035       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
7036 
7037   Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
7038   ScratchRegisterScope ensure_scratch2(
7039       this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
7040 
7041   int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
7042   stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
7043 
7044   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
7045   for (int i = 0; i < number_of_words; i++) {
7046     __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
7047     __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
7048     __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
7049     __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
7050     stack_offset += kX86WordSize;
7051   }
7052 }
7053 
EmitSwap(size_t index)7054 void ParallelMoveResolverX86::EmitSwap(size_t index) {
7055   MoveOperands* move = moves_[index];
7056   Location source = move->GetSource();
7057   Location destination = move->GetDestination();
7058 
7059   if (source.IsRegister() && destination.IsRegister()) {
7060     // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
7061     DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
7062     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7063     __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
7064     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
7065   } else if (source.IsRegister() && destination.IsStackSlot()) {
7066     Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
7067   } else if (source.IsStackSlot() && destination.IsRegister()) {
7068     Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
7069   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
7070     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
7071   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
7072     // Use XOR Swap algorithm to avoid a temporary.
7073     DCHECK_NE(source.reg(), destination.reg());
7074     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7075     __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
7076     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
7077   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
7078     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7079   } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
7080     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7081   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
7082     // Take advantage of the 16 bytes in the XMM register.
7083     XmmRegister reg = source.AsFpuRegister<XmmRegister>();
7084     Address stack(ESP, destination.GetStackIndex());
7085     // Load the double into the high doubleword.
7086     __ movhpd(reg, stack);
7087 
7088     // Store the low double into the destination.
7089     __ movsd(stack, reg);
7090 
7091     // Move the high double to the low double.
7092     __ psrldq(reg, Immediate(8));
7093   } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
7094     // Take advantage of the 16 bytes in the XMM register.
7095     XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
7096     Address stack(ESP, source.GetStackIndex());
7097     // Load the double into the high doubleword.
7098     __ movhpd(reg, stack);
7099 
7100     // Store the low double into the destination.
7101     __ movsd(stack, reg);
7102 
7103     // Move the high double to the low double.
7104     __ psrldq(reg, Immediate(8));
7105   } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
7106     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
7107   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
7108     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
7109   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
7110     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
7111   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
7112     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
7113   } else {
7114     LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
7115   }
7116 }
7117 
SpillScratch(int reg)7118 void ParallelMoveResolverX86::SpillScratch(int reg) {
7119   __ pushl(static_cast<Register>(reg));
7120 }
7121 
RestoreScratch(int reg)7122 void ParallelMoveResolverX86::RestoreScratch(int reg) {
7123   __ popl(static_cast<Register>(reg));
7124 }
7125 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)7126 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
7127     HLoadClass::LoadKind desired_class_load_kind) {
7128   switch (desired_class_load_kind) {
7129     case HLoadClass::LoadKind::kInvalid:
7130       LOG(FATAL) << "UNREACHABLE";
7131       UNREACHABLE();
7132     case HLoadClass::LoadKind::kReferrersClass:
7133       break;
7134     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
7135     case HLoadClass::LoadKind::kBootImageRelRo:
7136     case HLoadClass::LoadKind::kBssEntry:
7137     case HLoadClass::LoadKind::kBssEntryPublic:
7138     case HLoadClass::LoadKind::kBssEntryPackage:
7139       DCHECK(!GetCompilerOptions().IsJitCompiler());
7140       break;
7141     case HLoadClass::LoadKind::kJitBootImageAddress:
7142     case HLoadClass::LoadKind::kJitTableAddress:
7143       DCHECK(GetCompilerOptions().IsJitCompiler());
7144       break;
7145     case HLoadClass::LoadKind::kRuntimeCall:
7146       break;
7147   }
7148   return desired_class_load_kind;
7149 }
7150 
VisitLoadClass(HLoadClass * cls)7151 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
7152   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7153   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7154     InvokeRuntimeCallingConvention calling_convention;
7155     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
7156         cls,
7157         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
7158         Location::RegisterLocation(EAX));
7159     DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
7160     return;
7161   }
7162   DCHECK_EQ(cls->NeedsAccessCheck(),
7163             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7164                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7165 
7166   const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
7167   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
7168       ? LocationSummary::kCallOnSlowPath
7169       : LocationSummary::kNoCall;
7170   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
7171   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
7172     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7173   }
7174 
7175   if (load_kind == HLoadClass::LoadKind::kReferrersClass || cls->HasPcRelativeLoadKind()) {
7176     locations->SetInAt(0, Location::RequiresRegister());
7177   }
7178   locations->SetOut(Location::RequiresRegister());
7179   if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
7180     if (!gUseReadBarrier || kUseBakerReadBarrier) {
7181       // Rely on the type resolution and/or initialization to save everything.
7182       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7183     } else {
7184       // For non-Baker read barrier we have a temp-clobbering call.
7185     }
7186   }
7187 }
7188 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)7189 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
7190                                               dex::TypeIndex type_index,
7191                                               Handle<mirror::Class> handle) {
7192   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
7193   // Add a patch entry and return the label.
7194   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
7195   PatchInfo<Label>* info = &jit_class_patches_.back();
7196   return &info->label;
7197 }
7198 
7199 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7200 // move.
VisitLoadClass(HLoadClass * cls)7201 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
7202   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
7203   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
7204     codegen_->GenerateLoadClassRuntimeCall(cls);
7205     return;
7206   }
7207   DCHECK_EQ(cls->NeedsAccessCheck(),
7208             load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
7209                 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
7210 
7211   LocationSummary* locations = cls->GetLocations();
7212   Location out_loc = locations->Out();
7213   Register out = out_loc.AsRegister<Register>();
7214 
7215   bool generate_null_check = false;
7216   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
7217       ? kWithoutReadBarrier
7218       : gCompilerReadBarrierOption;
7219   switch (load_kind) {
7220     case HLoadClass::LoadKind::kReferrersClass: {
7221       DCHECK(!cls->CanCallRuntime());
7222       DCHECK(!cls->MustGenerateClinitCheck());
7223       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
7224       Register current_method = locations->InAt(0).AsRegister<Register>();
7225       GenerateGcRootFieldLoad(
7226           cls,
7227           out_loc,
7228           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
7229           /* fixup_label= */ nullptr,
7230           read_barrier_option);
7231       break;
7232     }
7233     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
7234       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7235              codegen_->GetCompilerOptions().IsBootImageExtension());
7236       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7237       Register method_address = locations->InAt(0).AsRegister<Register>();
7238       __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7239       codegen_->RecordBootImageTypePatch(cls);
7240       break;
7241     }
7242     case HLoadClass::LoadKind::kBootImageRelRo: {
7243       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7244       Register method_address = locations->InAt(0).AsRegister<Register>();
7245       __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7246       codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7247                                           CodeGenerator::GetBootImageOffset(cls));
7248       break;
7249     }
7250     case HLoadClass::LoadKind::kBssEntry:
7251     case HLoadClass::LoadKind::kBssEntryPublic:
7252     case HLoadClass::LoadKind::kBssEntryPackage: {
7253       Register method_address = locations->InAt(0).AsRegister<Register>();
7254       Address address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7255       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
7256       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7257       // No need for memory fence, thanks to the x86 memory model.
7258       generate_null_check = true;
7259       break;
7260     }
7261     case HLoadClass::LoadKind::kJitBootImageAddress: {
7262       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
7263       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
7264       DCHECK_NE(address, 0u);
7265       __ movl(out, Immediate(address));
7266       break;
7267     }
7268     case HLoadClass::LoadKind::kJitTableAddress: {
7269       Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7270       Label* fixup_label = codegen_->NewJitRootClassPatch(
7271           cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
7272       // /* GcRoot<mirror::Class> */ out = *address
7273       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
7274       break;
7275     }
7276     case HLoadClass::LoadKind::kRuntimeCall:
7277     case HLoadClass::LoadKind::kInvalid:
7278       LOG(FATAL) << "UNREACHABLE";
7279       UNREACHABLE();
7280   }
7281 
7282   if (generate_null_check || cls->MustGenerateClinitCheck()) {
7283     DCHECK(cls->CanCallRuntime());
7284     SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
7285     codegen_->AddSlowPath(slow_path);
7286 
7287     if (generate_null_check) {
7288       __ testl(out, out);
7289       __ j(kEqual, slow_path->GetEntryLabel());
7290     }
7291 
7292     if (cls->MustGenerateClinitCheck()) {
7293       GenerateClassInitializationCheck(slow_path, out);
7294     } else {
7295       __ Bind(slow_path->GetExitLabel());
7296     }
7297   }
7298 }
7299 
VisitLoadMethodHandle(HLoadMethodHandle * load)7300 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7301   InvokeRuntimeCallingConvention calling_convention;
7302   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7303   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
7304 }
7305 
VisitLoadMethodHandle(HLoadMethodHandle * load)7306 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
7307   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
7308 }
7309 
VisitLoadMethodType(HLoadMethodType * load)7310 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
7311   InvokeRuntimeCallingConvention calling_convention;
7312   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
7313   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
7314 }
7315 
VisitLoadMethodType(HLoadMethodType * load)7316 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
7317   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
7318 }
7319 
VisitClinitCheck(HClinitCheck * check)7320 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
7321   LocationSummary* locations =
7322       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
7323   locations->SetInAt(0, Location::RequiresRegister());
7324   if (check->HasUses()) {
7325     locations->SetOut(Location::SameAsFirstInput());
7326   }
7327   // Rely on the type initialization to save everything we need.
7328   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7329 }
7330 
VisitClinitCheck(HClinitCheck * check)7331 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
7332   // We assume the class to not be null.
7333   SlowPathCode* slow_path =
7334       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
7335   codegen_->AddSlowPath(slow_path);
7336   GenerateClassInitializationCheck(slow_path,
7337                                    check->GetLocations()->InAt(0).AsRegister<Register>());
7338 }
7339 
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)7340 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
7341     SlowPathCode* slow_path, Register class_reg) {
7342   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
7343   __ j(kBelow, slow_path->GetEntryLabel());
7344   __ Bind(slow_path->GetExitLabel());
7345 }
7346 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)7347 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
7348                                                                     Register temp) {
7349   uint32_t path_to_root = check->GetBitstringPathToRoot();
7350   uint32_t mask = check->GetBitstringMask();
7351   DCHECK(IsPowerOfTwo(mask + 1));
7352   size_t mask_bits = WhichPowerOf2(mask + 1);
7353 
7354   if (mask_bits == 16u) {
7355     // Compare the bitstring in memory.
7356     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
7357   } else {
7358     // /* uint32_t */ temp = temp->status_
7359     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
7360     // Compare the bitstring bits using SUB.
7361     __ subl(temp, Immediate(path_to_root));
7362     // Shift out bits that do not contribute to the comparison.
7363     __ shll(temp, Immediate(32u - mask_bits));
7364   }
7365 }
7366 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)7367 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
7368     HLoadString::LoadKind desired_string_load_kind) {
7369   switch (desired_string_load_kind) {
7370     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
7371     case HLoadString::LoadKind::kBootImageRelRo:
7372     case HLoadString::LoadKind::kBssEntry:
7373       DCHECK(!GetCompilerOptions().IsJitCompiler());
7374       break;
7375     case HLoadString::LoadKind::kJitBootImageAddress:
7376     case HLoadString::LoadKind::kJitTableAddress:
7377       DCHECK(GetCompilerOptions().IsJitCompiler());
7378       break;
7379     case HLoadString::LoadKind::kRuntimeCall:
7380       break;
7381   }
7382   return desired_string_load_kind;
7383 }
7384 
VisitLoadString(HLoadString * load)7385 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
7386   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
7387   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
7388   HLoadString::LoadKind load_kind = load->GetLoadKind();
7389   if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
7390       load_kind == HLoadString::LoadKind::kBootImageRelRo ||
7391       load_kind == HLoadString::LoadKind::kBssEntry) {
7392     locations->SetInAt(0, Location::RequiresRegister());
7393   }
7394   if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
7395     locations->SetOut(Location::RegisterLocation(EAX));
7396   } else {
7397     locations->SetOut(Location::RequiresRegister());
7398     if (load_kind == HLoadString::LoadKind::kBssEntry) {
7399       if (!gUseReadBarrier || kUseBakerReadBarrier) {
7400         // Rely on the pResolveString to save everything.
7401         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
7402       } else {
7403         // For non-Baker read barrier we have a temp-clobbering call.
7404       }
7405     }
7406   }
7407 }
7408 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)7409 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
7410                                                dex::StringIndex string_index,
7411                                                Handle<mirror::String> handle) {
7412   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
7413   // Add a patch entry and return the label.
7414   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
7415   PatchInfo<Label>* info = &jit_string_patches_.back();
7416   return &info->label;
7417 }
7418 
7419 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
7420 // move.
VisitLoadString(HLoadString * load)7421 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
7422   LocationSummary* locations = load->GetLocations();
7423   Location out_loc = locations->Out();
7424   Register out = out_loc.AsRegister<Register>();
7425 
7426   switch (load->GetLoadKind()) {
7427     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7428       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7429              codegen_->GetCompilerOptions().IsBootImageExtension());
7430       Register method_address = locations->InAt(0).AsRegister<Register>();
7431       __ leal(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7432       codegen_->RecordBootImageStringPatch(load);
7433       return;
7434     }
7435     case HLoadString::LoadKind::kBootImageRelRo: {
7436       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7437       Register method_address = locations->InAt(0).AsRegister<Register>();
7438       __ movl(out, Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset));
7439       codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
7440                                           CodeGenerator::GetBootImageOffset(load));
7441       return;
7442     }
7443     case HLoadString::LoadKind::kBssEntry: {
7444       Register method_address = locations->InAt(0).AsRegister<Register>();
7445       Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
7446       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7447       // /* GcRoot<mirror::String> */ out = *address  /* PC-relative */
7448       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
7449       // No need for memory fence, thanks to the x86 memory model.
7450       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
7451       codegen_->AddSlowPath(slow_path);
7452       __ testl(out, out);
7453       __ j(kEqual, slow_path->GetEntryLabel());
7454       __ Bind(slow_path->GetExitLabel());
7455       return;
7456     }
7457     case HLoadString::LoadKind::kJitBootImageAddress: {
7458       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7459       DCHECK_NE(address, 0u);
7460       __ movl(out, Immediate(address));
7461       return;
7462     }
7463     case HLoadString::LoadKind::kJitTableAddress: {
7464       Address address = Address::Absolute(CodeGeneratorX86::kPlaceholder32BitOffset);
7465       Label* fixup_label = codegen_->NewJitRootStringPatch(
7466           load->GetDexFile(), load->GetStringIndex(), load->GetString());
7467       // /* GcRoot<mirror::String> */ out = *address
7468       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
7469       return;
7470     }
7471     default:
7472       break;
7473   }
7474 
7475   // TODO: Re-add the compiler code to do string dex cache lookup again.
7476   InvokeRuntimeCallingConvention calling_convention;
7477   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
7478   __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
7479   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
7480   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7481 }
7482 
GetExceptionTlsAddress()7483 static Address GetExceptionTlsAddress() {
7484   return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
7485 }
7486 
VisitLoadException(HLoadException * load)7487 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
7488   LocationSummary* locations =
7489       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7490   locations->SetOut(Location::RequiresRegister());
7491 }
7492 
VisitLoadException(HLoadException * load)7493 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
7494   __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
7495 }
7496 
VisitClearException(HClearException * clear)7497 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
7498   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7499 }
7500 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)7501 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
7502   __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7503 }
7504 
VisitThrow(HThrow * instruction)7505 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7506   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7507       instruction, LocationSummary::kCallOnMainOnly);
7508   InvokeRuntimeCallingConvention calling_convention;
7509   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7510 }
7511 
VisitThrow(HThrow * instruction)7512 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7513   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7514   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7515 }
7516 
7517 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7518 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7519   if (gUseReadBarrier &&
7520       !kUseBakerReadBarrier &&
7521       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7522        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7523        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7524     return 1;
7525   }
7526   return 0;
7527 }
7528 
7529 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7530 // interface pointer, the current interface is compared in memory.
7531 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7532 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7533   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7534     return 2;
7535   }
7536   return 1 + NumberOfInstanceOfTemps(type_check_kind);
7537 }
7538 
VisitInstanceOf(HInstanceOf * instruction)7539 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7540   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7541   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7542   bool baker_read_barrier_slow_path = false;
7543   switch (type_check_kind) {
7544     case TypeCheckKind::kExactCheck:
7545     case TypeCheckKind::kAbstractClassCheck:
7546     case TypeCheckKind::kClassHierarchyCheck:
7547     case TypeCheckKind::kArrayObjectCheck: {
7548       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7549       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7550       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7551       break;
7552     }
7553     case TypeCheckKind::kArrayCheck:
7554     case TypeCheckKind::kUnresolvedCheck:
7555     case TypeCheckKind::kInterfaceCheck:
7556       call_kind = LocationSummary::kCallOnSlowPath;
7557       break;
7558     case TypeCheckKind::kBitstringCheck:
7559       break;
7560   }
7561 
7562   LocationSummary* locations =
7563       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7564   if (baker_read_barrier_slow_path) {
7565     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7566   }
7567   locations->SetInAt(0, Location::RequiresRegister());
7568   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7569     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7570     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7571     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7572   } else {
7573     locations->SetInAt(1, Location::Any());
7574   }
7575   // Note that TypeCheckSlowPathX86 uses this "out" register too.
7576   locations->SetOut(Location::RequiresRegister());
7577   // When read barriers are enabled, we need a temporary register for some cases.
7578   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7579 }
7580 
VisitInstanceOf(HInstanceOf * instruction)7581 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7582   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7583   LocationSummary* locations = instruction->GetLocations();
7584   Location obj_loc = locations->InAt(0);
7585   Register obj = obj_loc.AsRegister<Register>();
7586   Location cls = locations->InAt(1);
7587   Location out_loc = locations->Out();
7588   Register out = out_loc.AsRegister<Register>();
7589   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7590   DCHECK_LE(num_temps, 1u);
7591   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7592   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7593   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7594   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7595   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7596   SlowPathCode* slow_path = nullptr;
7597   NearLabel done, zero;
7598 
7599   // Return 0 if `obj` is null.
7600   // Avoid null check if we know obj is not null.
7601   if (instruction->MustDoNullCheck()) {
7602     __ testl(obj, obj);
7603     __ j(kEqual, &zero);
7604   }
7605 
7606   switch (type_check_kind) {
7607     case TypeCheckKind::kExactCheck: {
7608       ReadBarrierOption read_barrier_option =
7609           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7610       // /* HeapReference<Class> */ out = obj->klass_
7611       GenerateReferenceLoadTwoRegisters(instruction,
7612                                         out_loc,
7613                                         obj_loc,
7614                                         class_offset,
7615                                         read_barrier_option);
7616       if (cls.IsRegister()) {
7617         __ cmpl(out, cls.AsRegister<Register>());
7618       } else {
7619         DCHECK(cls.IsStackSlot()) << cls;
7620         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7621       }
7622 
7623       // Classes must be equal for the instanceof to succeed.
7624       __ j(kNotEqual, &zero);
7625       __ movl(out, Immediate(1));
7626       __ jmp(&done);
7627       break;
7628     }
7629 
7630     case TypeCheckKind::kAbstractClassCheck: {
7631       ReadBarrierOption read_barrier_option =
7632           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7633       // /* HeapReference<Class> */ out = obj->klass_
7634       GenerateReferenceLoadTwoRegisters(instruction,
7635                                         out_loc,
7636                                         obj_loc,
7637                                         class_offset,
7638                                         read_barrier_option);
7639       // If the class is abstract, we eagerly fetch the super class of the
7640       // object to avoid doing a comparison we know will fail.
7641       NearLabel loop;
7642       __ Bind(&loop);
7643       // /* HeapReference<Class> */ out = out->super_class_
7644       GenerateReferenceLoadOneRegister(instruction,
7645                                        out_loc,
7646                                        super_offset,
7647                                        maybe_temp_loc,
7648                                        read_barrier_option);
7649       __ testl(out, out);
7650       // If `out` is null, we use it for the result, and jump to `done`.
7651       __ j(kEqual, &done);
7652       if (cls.IsRegister()) {
7653         __ cmpl(out, cls.AsRegister<Register>());
7654       } else {
7655         DCHECK(cls.IsStackSlot()) << cls;
7656         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7657       }
7658       __ j(kNotEqual, &loop);
7659       __ movl(out, Immediate(1));
7660       if (zero.IsLinked()) {
7661         __ jmp(&done);
7662       }
7663       break;
7664     }
7665 
7666     case TypeCheckKind::kClassHierarchyCheck: {
7667       ReadBarrierOption read_barrier_option =
7668           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7669       // /* HeapReference<Class> */ out = obj->klass_
7670       GenerateReferenceLoadTwoRegisters(instruction,
7671                                         out_loc,
7672                                         obj_loc,
7673                                         class_offset,
7674                                         read_barrier_option);
7675       // Walk over the class hierarchy to find a match.
7676       NearLabel loop, success;
7677       __ Bind(&loop);
7678       if (cls.IsRegister()) {
7679         __ cmpl(out, cls.AsRegister<Register>());
7680       } else {
7681         DCHECK(cls.IsStackSlot()) << cls;
7682         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7683       }
7684       __ j(kEqual, &success);
7685       // /* HeapReference<Class> */ out = out->super_class_
7686       GenerateReferenceLoadOneRegister(instruction,
7687                                        out_loc,
7688                                        super_offset,
7689                                        maybe_temp_loc,
7690                                        read_barrier_option);
7691       __ testl(out, out);
7692       __ j(kNotEqual, &loop);
7693       // If `out` is null, we use it for the result, and jump to `done`.
7694       __ jmp(&done);
7695       __ Bind(&success);
7696       __ movl(out, Immediate(1));
7697       if (zero.IsLinked()) {
7698         __ jmp(&done);
7699       }
7700       break;
7701     }
7702 
7703     case TypeCheckKind::kArrayObjectCheck: {
7704       ReadBarrierOption read_barrier_option =
7705           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7706       // /* HeapReference<Class> */ out = obj->klass_
7707       GenerateReferenceLoadTwoRegisters(instruction,
7708                                         out_loc,
7709                                         obj_loc,
7710                                         class_offset,
7711                                         read_barrier_option);
7712       // Do an exact check.
7713       NearLabel exact_check;
7714       if (cls.IsRegister()) {
7715         __ cmpl(out, cls.AsRegister<Register>());
7716       } else {
7717         DCHECK(cls.IsStackSlot()) << cls;
7718         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7719       }
7720       __ j(kEqual, &exact_check);
7721       // Otherwise, we need to check that the object's class is a non-primitive array.
7722       // /* HeapReference<Class> */ out = out->component_type_
7723       GenerateReferenceLoadOneRegister(instruction,
7724                                        out_loc,
7725                                        component_offset,
7726                                        maybe_temp_loc,
7727                                        read_barrier_option);
7728       __ testl(out, out);
7729       // If `out` is null, we use it for the result, and jump to `done`.
7730       __ j(kEqual, &done);
7731       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7732       __ j(kNotEqual, &zero);
7733       __ Bind(&exact_check);
7734       __ movl(out, Immediate(1));
7735       __ jmp(&done);
7736       break;
7737     }
7738 
7739     case TypeCheckKind::kArrayCheck: {
7740       // No read barrier since the slow path will retry upon failure.
7741       // /* HeapReference<Class> */ out = obj->klass_
7742       GenerateReferenceLoadTwoRegisters(instruction,
7743                                         out_loc,
7744                                         obj_loc,
7745                                         class_offset,
7746                                         kWithoutReadBarrier);
7747       if (cls.IsRegister()) {
7748         __ cmpl(out, cls.AsRegister<Register>());
7749       } else {
7750         DCHECK(cls.IsStackSlot()) << cls;
7751         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7752       }
7753       DCHECK(locations->OnlyCallsOnSlowPath());
7754       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7755           instruction, /* is_fatal= */ false);
7756       codegen_->AddSlowPath(slow_path);
7757       __ j(kNotEqual, slow_path->GetEntryLabel());
7758       __ movl(out, Immediate(1));
7759       if (zero.IsLinked()) {
7760         __ jmp(&done);
7761       }
7762       break;
7763     }
7764 
7765     case TypeCheckKind::kUnresolvedCheck:
7766     case TypeCheckKind::kInterfaceCheck: {
7767       // Note that we indeed only call on slow path, but we always go
7768       // into the slow path for the unresolved and interface check
7769       // cases.
7770       //
7771       // We cannot directly call the InstanceofNonTrivial runtime
7772       // entry point without resorting to a type checking slow path
7773       // here (i.e. by calling InvokeRuntime directly), as it would
7774       // require to assign fixed registers for the inputs of this
7775       // HInstanceOf instruction (following the runtime calling
7776       // convention), which might be cluttered by the potential first
7777       // read barrier emission at the beginning of this method.
7778       //
7779       // TODO: Introduce a new runtime entry point taking the object
7780       // to test (instead of its class) as argument, and let it deal
7781       // with the read barrier issues. This will let us refactor this
7782       // case of the `switch` code as it was previously (with a direct
7783       // call to the runtime not using a type checking slow path).
7784       // This should also be beneficial for the other cases above.
7785       DCHECK(locations->OnlyCallsOnSlowPath());
7786       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7787           instruction, /* is_fatal= */ false);
7788       codegen_->AddSlowPath(slow_path);
7789       __ jmp(slow_path->GetEntryLabel());
7790       if (zero.IsLinked()) {
7791         __ jmp(&done);
7792       }
7793       break;
7794     }
7795 
7796     case TypeCheckKind::kBitstringCheck: {
7797       // /* HeapReference<Class> */ temp = obj->klass_
7798       GenerateReferenceLoadTwoRegisters(instruction,
7799                                         out_loc,
7800                                         obj_loc,
7801                                         class_offset,
7802                                         kWithoutReadBarrier);
7803 
7804       GenerateBitstringTypeCheckCompare(instruction, out);
7805       __ j(kNotEqual, &zero);
7806       __ movl(out, Immediate(1));
7807       __ jmp(&done);
7808       break;
7809     }
7810   }
7811 
7812   if (zero.IsLinked()) {
7813     __ Bind(&zero);
7814     __ xorl(out, out);
7815   }
7816 
7817   if (done.IsLinked()) {
7818     __ Bind(&done);
7819   }
7820 
7821   if (slow_path != nullptr) {
7822     __ Bind(slow_path->GetExitLabel());
7823   }
7824 }
7825 
VisitCheckCast(HCheckCast * instruction)7826 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7827   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7828   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7829   LocationSummary* locations =
7830       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7831   locations->SetInAt(0, Location::RequiresRegister());
7832   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7833     // Require a register for the interface check since there is a loop that compares the class to
7834     // a memory address.
7835     locations->SetInAt(1, Location::RequiresRegister());
7836   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7837     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7838     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7839     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7840   } else {
7841     locations->SetInAt(1, Location::Any());
7842   }
7843   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7844   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7845 }
7846 
VisitCheckCast(HCheckCast * instruction)7847 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7848   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7849   LocationSummary* locations = instruction->GetLocations();
7850   Location obj_loc = locations->InAt(0);
7851   Register obj = obj_loc.AsRegister<Register>();
7852   Location cls = locations->InAt(1);
7853   Location temp_loc = locations->GetTemp(0);
7854   Register temp = temp_loc.AsRegister<Register>();
7855   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7856   DCHECK_GE(num_temps, 1u);
7857   DCHECK_LE(num_temps, 2u);
7858   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7859   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7860   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7861   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7862   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7863   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7864   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7865   const uint32_t object_array_data_offset =
7866       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7867 
7868   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7869   SlowPathCode* type_check_slow_path =
7870       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7871           instruction, is_type_check_slow_path_fatal);
7872   codegen_->AddSlowPath(type_check_slow_path);
7873 
7874   NearLabel done;
7875   // Avoid null check if we know obj is not null.
7876   if (instruction->MustDoNullCheck()) {
7877     __ testl(obj, obj);
7878     __ j(kEqual, &done);
7879   }
7880 
7881   switch (type_check_kind) {
7882     case TypeCheckKind::kExactCheck:
7883     case TypeCheckKind::kArrayCheck: {
7884       // /* HeapReference<Class> */ temp = obj->klass_
7885       GenerateReferenceLoadTwoRegisters(instruction,
7886                                         temp_loc,
7887                                         obj_loc,
7888                                         class_offset,
7889                                         kWithoutReadBarrier);
7890 
7891       if (cls.IsRegister()) {
7892         __ cmpl(temp, cls.AsRegister<Register>());
7893       } else {
7894         DCHECK(cls.IsStackSlot()) << cls;
7895         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7896       }
7897       // Jump to slow path for throwing the exception or doing a
7898       // more involved array check.
7899       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7900       break;
7901     }
7902 
7903     case TypeCheckKind::kAbstractClassCheck: {
7904       // /* HeapReference<Class> */ temp = obj->klass_
7905       GenerateReferenceLoadTwoRegisters(instruction,
7906                                         temp_loc,
7907                                         obj_loc,
7908                                         class_offset,
7909                                         kWithoutReadBarrier);
7910 
7911       // If the class is abstract, we eagerly fetch the super class of the
7912       // object to avoid doing a comparison we know will fail.
7913       NearLabel loop;
7914       __ Bind(&loop);
7915       // /* HeapReference<Class> */ temp = temp->super_class_
7916       GenerateReferenceLoadOneRegister(instruction,
7917                                        temp_loc,
7918                                        super_offset,
7919                                        maybe_temp2_loc,
7920                                        kWithoutReadBarrier);
7921 
7922       // If the class reference currently in `temp` is null, jump to the slow path to throw the
7923       // exception.
7924       __ testl(temp, temp);
7925       __ j(kZero, type_check_slow_path->GetEntryLabel());
7926 
7927       // Otherwise, compare the classes
7928       if (cls.IsRegister()) {
7929         __ cmpl(temp, cls.AsRegister<Register>());
7930       } else {
7931         DCHECK(cls.IsStackSlot()) << cls;
7932         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7933       }
7934       __ j(kNotEqual, &loop);
7935       break;
7936     }
7937 
7938     case TypeCheckKind::kClassHierarchyCheck: {
7939       // /* HeapReference<Class> */ temp = obj->klass_
7940       GenerateReferenceLoadTwoRegisters(instruction,
7941                                         temp_loc,
7942                                         obj_loc,
7943                                         class_offset,
7944                                         kWithoutReadBarrier);
7945 
7946       // Walk over the class hierarchy to find a match.
7947       NearLabel loop;
7948       __ Bind(&loop);
7949       if (cls.IsRegister()) {
7950         __ cmpl(temp, cls.AsRegister<Register>());
7951       } else {
7952         DCHECK(cls.IsStackSlot()) << cls;
7953         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7954       }
7955       __ j(kEqual, &done);
7956 
7957       // /* HeapReference<Class> */ temp = temp->super_class_
7958       GenerateReferenceLoadOneRegister(instruction,
7959                                        temp_loc,
7960                                        super_offset,
7961                                        maybe_temp2_loc,
7962                                        kWithoutReadBarrier);
7963 
7964       // If the class reference currently in `temp` is not null, jump
7965       // back at the beginning of the loop.
7966       __ testl(temp, temp);
7967       __ j(kNotZero, &loop);
7968       // Otherwise, jump to the slow path to throw the exception.;
7969       __ jmp(type_check_slow_path->GetEntryLabel());
7970       break;
7971     }
7972 
7973     case TypeCheckKind::kArrayObjectCheck: {
7974       // /* HeapReference<Class> */ temp = obj->klass_
7975       GenerateReferenceLoadTwoRegisters(instruction,
7976                                         temp_loc,
7977                                         obj_loc,
7978                                         class_offset,
7979                                         kWithoutReadBarrier);
7980 
7981       // Do an exact check.
7982       if (cls.IsRegister()) {
7983         __ cmpl(temp, cls.AsRegister<Register>());
7984       } else {
7985         DCHECK(cls.IsStackSlot()) << cls;
7986         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7987       }
7988       __ j(kEqual, &done);
7989 
7990       // Otherwise, we need to check that the object's class is a non-primitive array.
7991       // /* HeapReference<Class> */ temp = temp->component_type_
7992       GenerateReferenceLoadOneRegister(instruction,
7993                                        temp_loc,
7994                                        component_offset,
7995                                        maybe_temp2_loc,
7996                                        kWithoutReadBarrier);
7997 
7998       // If the component type is null (i.e. the object not an array),  jump to the slow path to
7999       // throw the exception. Otherwise proceed with the check.
8000       __ testl(temp, temp);
8001       __ j(kZero, type_check_slow_path->GetEntryLabel());
8002 
8003       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
8004       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8005       break;
8006     }
8007 
8008     case TypeCheckKind::kUnresolvedCheck:
8009       // We always go into the type check slow path for the unresolved check case.
8010       // We cannot directly call the CheckCast runtime entry point
8011       // without resorting to a type checking slow path here (i.e. by
8012       // calling InvokeRuntime directly), as it would require to
8013       // assign fixed registers for the inputs of this HInstanceOf
8014       // instruction (following the runtime calling convention), which
8015       // might be cluttered by the potential first read barrier
8016       // emission at the beginning of this method.
8017       __ jmp(type_check_slow_path->GetEntryLabel());
8018       break;
8019 
8020     case TypeCheckKind::kInterfaceCheck: {
8021       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
8022       // We can not get false positives by doing this.
8023       // /* HeapReference<Class> */ temp = obj->klass_
8024       GenerateReferenceLoadTwoRegisters(instruction,
8025                                         temp_loc,
8026                                         obj_loc,
8027                                         class_offset,
8028                                         kWithoutReadBarrier);
8029 
8030       // /* HeapReference<Class> */ temp = temp->iftable_
8031       GenerateReferenceLoadTwoRegisters(instruction,
8032                                         temp_loc,
8033                                         temp_loc,
8034                                         iftable_offset,
8035                                         kWithoutReadBarrier);
8036       // Iftable is never null.
8037       __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
8038       // Maybe poison the `cls` for direct comparison with memory.
8039       __ MaybePoisonHeapReference(cls.AsRegister<Register>());
8040       // Loop through the iftable and check if any class matches.
8041       NearLabel start_loop;
8042       __ Bind(&start_loop);
8043       // Need to subtract first to handle the empty array case.
8044       __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
8045       __ j(kNegative, type_check_slow_path->GetEntryLabel());
8046       // Go to next interface if the classes do not match.
8047       __ cmpl(cls.AsRegister<Register>(),
8048               CodeGeneratorX86::ArrayAddress(temp,
8049                                              maybe_temp2_loc,
8050                                              TIMES_4,
8051                                              object_array_data_offset));
8052       __ j(kNotEqual, &start_loop);
8053       // If `cls` was poisoned above, unpoison it.
8054       __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
8055       break;
8056     }
8057 
8058     case TypeCheckKind::kBitstringCheck: {
8059       // /* HeapReference<Class> */ temp = obj->klass_
8060       GenerateReferenceLoadTwoRegisters(instruction,
8061                                         temp_loc,
8062                                         obj_loc,
8063                                         class_offset,
8064                                         kWithoutReadBarrier);
8065 
8066       GenerateBitstringTypeCheckCompare(instruction, temp);
8067       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
8068       break;
8069     }
8070   }
8071   __ Bind(&done);
8072 
8073   __ Bind(type_check_slow_path->GetExitLabel());
8074 }
8075 
VisitMonitorOperation(HMonitorOperation * instruction)8076 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8077   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
8078       instruction, LocationSummary::kCallOnMainOnly);
8079   InvokeRuntimeCallingConvention calling_convention;
8080   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
8081 }
8082 
VisitMonitorOperation(HMonitorOperation * instruction)8083 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
8084   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
8085                                                  : kQuickUnlockObject,
8086                           instruction,
8087                           instruction->GetDexPc());
8088   if (instruction->IsEnter()) {
8089     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
8090   } else {
8091     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
8092   }
8093 }
8094 
VisitX86AndNot(HX86AndNot * instruction)8095 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
8096   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8097   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
8098   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8099   locations->SetInAt(0, Location::RequiresRegister());
8100   locations->SetInAt(1, Location::RequiresRegister());
8101   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8102 }
8103 
VisitX86AndNot(HX86AndNot * instruction)8104 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
8105   LocationSummary* locations = instruction->GetLocations();
8106   Location first = locations->InAt(0);
8107   Location second = locations->InAt(1);
8108   Location dest = locations->Out();
8109   if (instruction->GetResultType() == DataType::Type::kInt32) {
8110     __ andn(dest.AsRegister<Register>(),
8111             first.AsRegister<Register>(),
8112             second.AsRegister<Register>());
8113   } else {
8114     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8115     __ andn(dest.AsRegisterPairLow<Register>(),
8116             first.AsRegisterPairLow<Register>(),
8117             second.AsRegisterPairLow<Register>());
8118     __ andn(dest.AsRegisterPairHigh<Register>(),
8119             first.AsRegisterPairHigh<Register>(),
8120             second.AsRegisterPairHigh<Register>());
8121   }
8122 }
8123 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8124 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
8125   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
8126   DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
8127   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
8128   locations->SetInAt(0, Location::RequiresRegister());
8129   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
8130 }
8131 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)8132 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
8133     HX86MaskOrResetLeastSetBit* instruction) {
8134   LocationSummary* locations = instruction->GetLocations();
8135   Location src = locations->InAt(0);
8136   Location dest = locations->Out();
8137   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
8138   switch (instruction->GetOpKind()) {
8139     case HInstruction::kAnd:
8140       __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
8141       break;
8142     case HInstruction::kXor:
8143       __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
8144       break;
8145     default:
8146       LOG(FATAL) << "Unreachable";
8147   }
8148 }
8149 
VisitAnd(HAnd * instruction)8150 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)8151 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)8152 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
8153 
HandleBitwiseOperation(HBinaryOperation * instruction)8154 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8155   LocationSummary* locations =
8156       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
8157   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
8158          || instruction->GetResultType() == DataType::Type::kInt64);
8159   locations->SetInAt(0, Location::RequiresRegister());
8160   locations->SetInAt(1, Location::Any());
8161   locations->SetOut(Location::SameAsFirstInput());
8162 }
8163 
VisitAnd(HAnd * instruction)8164 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
8165   HandleBitwiseOperation(instruction);
8166 }
8167 
VisitOr(HOr * instruction)8168 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
8169   HandleBitwiseOperation(instruction);
8170 }
8171 
VisitXor(HXor * instruction)8172 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
8173   HandleBitwiseOperation(instruction);
8174 }
8175 
HandleBitwiseOperation(HBinaryOperation * instruction)8176 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
8177   LocationSummary* locations = instruction->GetLocations();
8178   Location first = locations->InAt(0);
8179   Location second = locations->InAt(1);
8180   DCHECK(first.Equals(locations->Out()));
8181 
8182   if (instruction->GetResultType() == DataType::Type::kInt32) {
8183     if (second.IsRegister()) {
8184       if (instruction->IsAnd()) {
8185         __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
8186       } else if (instruction->IsOr()) {
8187         __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
8188       } else {
8189         DCHECK(instruction->IsXor());
8190         __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
8191       }
8192     } else if (second.IsConstant()) {
8193       if (instruction->IsAnd()) {
8194         __ andl(first.AsRegister<Register>(),
8195                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8196       } else if (instruction->IsOr()) {
8197         __ orl(first.AsRegister<Register>(),
8198                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8199       } else {
8200         DCHECK(instruction->IsXor());
8201         __ xorl(first.AsRegister<Register>(),
8202                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
8203       }
8204     } else {
8205       if (instruction->IsAnd()) {
8206         __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8207       } else if (instruction->IsOr()) {
8208         __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8209       } else {
8210         DCHECK(instruction->IsXor());
8211         __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
8212       }
8213     }
8214   } else {
8215     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
8216     if (second.IsRegisterPair()) {
8217       if (instruction->IsAnd()) {
8218         __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8219         __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8220       } else if (instruction->IsOr()) {
8221         __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8222         __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8223       } else {
8224         DCHECK(instruction->IsXor());
8225         __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
8226         __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
8227       }
8228     } else if (second.IsDoubleStackSlot()) {
8229       if (instruction->IsAnd()) {
8230         __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8231         __ andl(first.AsRegisterPairHigh<Register>(),
8232                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8233       } else if (instruction->IsOr()) {
8234         __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8235         __ orl(first.AsRegisterPairHigh<Register>(),
8236                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8237       } else {
8238         DCHECK(instruction->IsXor());
8239         __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
8240         __ xorl(first.AsRegisterPairHigh<Register>(),
8241                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
8242       }
8243     } else {
8244       DCHECK(second.IsConstant()) << second;
8245       int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
8246       int32_t low_value = Low32Bits(value);
8247       int32_t high_value = High32Bits(value);
8248       Immediate low(low_value);
8249       Immediate high(high_value);
8250       Register first_low = first.AsRegisterPairLow<Register>();
8251       Register first_high = first.AsRegisterPairHigh<Register>();
8252       if (instruction->IsAnd()) {
8253         if (low_value == 0) {
8254           __ xorl(first_low, first_low);
8255         } else if (low_value != -1) {
8256           __ andl(first_low, low);
8257         }
8258         if (high_value == 0) {
8259           __ xorl(first_high, first_high);
8260         } else if (high_value != -1) {
8261           __ andl(first_high, high);
8262         }
8263       } else if (instruction->IsOr()) {
8264         if (low_value != 0) {
8265           __ orl(first_low, low);
8266         }
8267         if (high_value != 0) {
8268           __ orl(first_high, high);
8269         }
8270       } else {
8271         DCHECK(instruction->IsXor());
8272         if (low_value != 0) {
8273           __ xorl(first_low, low);
8274         }
8275         if (high_value != 0) {
8276           __ xorl(first_high, high);
8277         }
8278       }
8279     }
8280   }
8281 }
8282 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)8283 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
8284     HInstruction* instruction,
8285     Location out,
8286     uint32_t offset,
8287     Location maybe_temp,
8288     ReadBarrierOption read_barrier_option) {
8289   Register out_reg = out.AsRegister<Register>();
8290   if (read_barrier_option == kWithReadBarrier) {
8291     CHECK(gUseReadBarrier);
8292     if (kUseBakerReadBarrier) {
8293       // Load with fast path based Baker's read barrier.
8294       // /* HeapReference<Object> */ out = *(out + offset)
8295       codegen_->GenerateFieldLoadWithBakerReadBarrier(
8296           instruction, out, out_reg, offset, /* needs_null_check= */ false);
8297     } else {
8298       // Load with slow path based read barrier.
8299       // Save the value of `out` into `maybe_temp` before overwriting it
8300       // in the following move operation, as we will need it for the
8301       // read barrier below.
8302       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
8303       __ movl(maybe_temp.AsRegister<Register>(), out_reg);
8304       // /* HeapReference<Object> */ out = *(out + offset)
8305       __ movl(out_reg, Address(out_reg, offset));
8306       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
8307     }
8308   } else {
8309     // Plain load with no read barrier.
8310     // /* HeapReference<Object> */ out = *(out + offset)
8311     __ movl(out_reg, Address(out_reg, offset));
8312     __ MaybeUnpoisonHeapReference(out_reg);
8313   }
8314 }
8315 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)8316 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
8317     HInstruction* instruction,
8318     Location out,
8319     Location obj,
8320     uint32_t offset,
8321     ReadBarrierOption read_barrier_option) {
8322   Register out_reg = out.AsRegister<Register>();
8323   Register obj_reg = obj.AsRegister<Register>();
8324   if (read_barrier_option == kWithReadBarrier) {
8325     CHECK(gUseReadBarrier);
8326     if (kUseBakerReadBarrier) {
8327       // Load with fast path based Baker's read barrier.
8328       // /* HeapReference<Object> */ out = *(obj + offset)
8329       codegen_->GenerateFieldLoadWithBakerReadBarrier(
8330           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
8331     } else {
8332       // Load with slow path based read barrier.
8333       // /* HeapReference<Object> */ out = *(obj + offset)
8334       __ movl(out_reg, Address(obj_reg, offset));
8335       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
8336     }
8337   } else {
8338     // Plain load with no read barrier.
8339     // /* HeapReference<Object> */ out = *(obj + offset)
8340     __ movl(out_reg, Address(obj_reg, offset));
8341     __ MaybeUnpoisonHeapReference(out_reg);
8342   }
8343 }
8344 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)8345 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
8346     HInstruction* instruction,
8347     Location root,
8348     const Address& address,
8349     Label* fixup_label,
8350     ReadBarrierOption read_barrier_option) {
8351   Register root_reg = root.AsRegister<Register>();
8352   if (read_barrier_option == kWithReadBarrier) {
8353     DCHECK(gUseReadBarrier);
8354     if (kUseBakerReadBarrier) {
8355       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
8356       // Baker's read barrier are used:
8357       //
8358       //   root = obj.field;
8359       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
8360       //   if (temp != null) {
8361       //     root = temp(root)
8362       //   }
8363 
8364       // /* GcRoot<mirror::Object> */ root = *address
8365       __ movl(root_reg, address);
8366       if (fixup_label != nullptr) {
8367         __ Bind(fixup_label);
8368       }
8369       static_assert(
8370           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
8371           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
8372           "have different sizes.");
8373       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
8374                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
8375                     "have different sizes.");
8376 
8377       // Slow path marking the GC root `root`.
8378       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8379           instruction, root, /* unpoison_ref_before_marking= */ false);
8380       codegen_->AddSlowPath(slow_path);
8381 
8382       // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
8383       const int32_t entry_point_offset =
8384           Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
8385       __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
8386       // The entrypoint is null when the GC is not marking.
8387       __ j(kNotEqual, slow_path->GetEntryLabel());
8388       __ Bind(slow_path->GetExitLabel());
8389     } else {
8390       // GC root loaded through a slow path for read barriers other
8391       // than Baker's.
8392       // /* GcRoot<mirror::Object>* */ root = address
8393       __ leal(root_reg, address);
8394       if (fixup_label != nullptr) {
8395         __ Bind(fixup_label);
8396       }
8397       // /* mirror::Object* */ root = root->Read()
8398       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8399     }
8400   } else {
8401     // Plain GC root load with no read barrier.
8402     // /* GcRoot<mirror::Object> */ root = *address
8403     __ movl(root_reg, address);
8404     if (fixup_label != nullptr) {
8405       __ Bind(fixup_label);
8406     }
8407     // Note that GC roots are not affected by heap poisoning, thus we
8408     // do not have to unpoison `root_reg` here.
8409   }
8410 }
8411 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)8412 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8413                                                              Location ref,
8414                                                              Register obj,
8415                                                              uint32_t offset,
8416                                                              bool needs_null_check) {
8417   DCHECK(gUseReadBarrier);
8418   DCHECK(kUseBakerReadBarrier);
8419 
8420   // /* HeapReference<Object> */ ref = *(obj + offset)
8421   Address src(obj, offset);
8422   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8423 }
8424 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)8425 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8426                                                              Location ref,
8427                                                              Register obj,
8428                                                              uint32_t data_offset,
8429                                                              Location index,
8430                                                              bool needs_null_check) {
8431   DCHECK(gUseReadBarrier);
8432   DCHECK(kUseBakerReadBarrier);
8433 
8434   static_assert(
8435       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8436       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8437   // /* HeapReference<Object> */ ref =
8438   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
8439   Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
8440   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8441 }
8442 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)8443 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8444                                                                  Location ref,
8445                                                                  Register obj,
8446                                                                  const Address& src,
8447                                                                  bool needs_null_check,
8448                                                                  bool always_update_field,
8449                                                                  Register* temp) {
8450   DCHECK(gUseReadBarrier);
8451   DCHECK(kUseBakerReadBarrier);
8452 
8453   // In slow path based read barriers, the read barrier call is
8454   // inserted after the original load. However, in fast path based
8455   // Baker's read barriers, we need to perform the load of
8456   // mirror::Object::monitor_ *before* the original reference load.
8457   // This load-load ordering is required by the read barrier.
8458   // The fast path/slow path (for Baker's algorithm) should look like:
8459   //
8460   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8461   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
8462   //   HeapReference<Object> ref = *src;  // Original reference load.
8463   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
8464   //   if (is_gray) {
8465   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
8466   //   }
8467   //
8468   // Note: the original implementation in ReadBarrier::Barrier is
8469   // slightly more complex as:
8470   // - it implements the load-load fence using a data dependency on
8471   //   the high-bits of rb_state, which are expected to be all zeroes
8472   //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
8473   //   which is a no-op thanks to the x86 memory model);
8474   // - it performs additional checks that we do not do here for
8475   //   performance reasons.
8476 
8477   Register ref_reg = ref.AsRegister<Register>();
8478   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8479 
8480   // Given the numeric representation, it's enough to check the low bit of the rb_state.
8481   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8482   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8483   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8484   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8485   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8486 
8487   // if (rb_state == ReadBarrier::GrayState())
8488   //   ref = ReadBarrier::Mark(ref);
8489   // At this point, just do the "if" and make sure that flags are preserved until the branch.
8490   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8491   if (needs_null_check) {
8492     MaybeRecordImplicitNullCheck(instruction);
8493   }
8494 
8495   // Load fence to prevent load-load reordering.
8496   // Note that this is a no-op, thanks to the x86 memory model.
8497   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8498 
8499   // The actual reference load.
8500   // /* HeapReference<Object> */ ref = *src
8501   __ movl(ref_reg, src);  // Flags are unaffected.
8502 
8503   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8504   // Slow path marking the object `ref` when it is gray.
8505   SlowPathCode* slow_path;
8506   if (always_update_field) {
8507     DCHECK(temp != nullptr);
8508     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8509         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8510   } else {
8511     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8512         instruction, ref, /* unpoison_ref_before_marking= */ true);
8513   }
8514   AddSlowPath(slow_path);
8515 
8516   // We have done the "if" of the gray bit check above, now branch based on the flags.
8517   __ j(kNotZero, slow_path->GetEntryLabel());
8518 
8519   // Object* ref = ref_addr->AsMirrorPtr()
8520   __ MaybeUnpoisonHeapReference(ref_reg);
8521 
8522   __ Bind(slow_path->GetExitLabel());
8523 }
8524 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8525 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8526                                                Location out,
8527                                                Location ref,
8528                                                Location obj,
8529                                                uint32_t offset,
8530                                                Location index) {
8531   DCHECK(gUseReadBarrier);
8532 
8533   // Insert a slow path based read barrier *after* the reference load.
8534   //
8535   // If heap poisoning is enabled, the unpoisoning of the loaded
8536   // reference will be carried out by the runtime within the slow
8537   // path.
8538   //
8539   // Note that `ref` currently does not get unpoisoned (when heap
8540   // poisoning is enabled), which is alright as the `ref` argument is
8541   // not used by the artReadBarrierSlow entry point.
8542   //
8543   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8544   SlowPathCode* slow_path = new (GetScopedAllocator())
8545       ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8546   AddSlowPath(slow_path);
8547 
8548   __ jmp(slow_path->GetEntryLabel());
8549   __ Bind(slow_path->GetExitLabel());
8550 }
8551 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8552 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8553                                                     Location out,
8554                                                     Location ref,
8555                                                     Location obj,
8556                                                     uint32_t offset,
8557                                                     Location index) {
8558   if (gUseReadBarrier) {
8559     // Baker's read barriers shall be handled by the fast path
8560     // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8561     DCHECK(!kUseBakerReadBarrier);
8562     // If heap poisoning is enabled, unpoisoning will be taken care of
8563     // by the runtime within the slow path.
8564     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8565   } else if (kPoisonHeapReferences) {
8566     __ UnpoisonHeapReference(out.AsRegister<Register>());
8567   }
8568 }
8569 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8570 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8571                                                       Location out,
8572                                                       Location root) {
8573   DCHECK(gUseReadBarrier);
8574 
8575   // Insert a slow path based read barrier *after* the GC root load.
8576   //
8577   // Note that GC roots are not affected by heap poisoning, so we do
8578   // not need to do anything special for this here.
8579   SlowPathCode* slow_path =
8580       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8581   AddSlowPath(slow_path);
8582 
8583   __ jmp(slow_path->GetEntryLabel());
8584   __ Bind(slow_path->GetExitLabel());
8585 }
8586 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8587 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8588   // Nothing to do, this should be removed during prepare for register allocator.
8589   LOG(FATAL) << "Unreachable";
8590 }
8591 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8592 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8593   // Nothing to do, this should be removed during prepare for register allocator.
8594   LOG(FATAL) << "Unreachable";
8595 }
8596 
8597 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8598 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8599   LocationSummary* locations =
8600       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8601   locations->SetInAt(0, Location::RequiresRegister());
8602 }
8603 
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8604 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8605                                                               int32_t lower_bound,
8606                                                               uint32_t num_entries,
8607                                                               HBasicBlock* switch_block,
8608                                                               HBasicBlock* default_block) {
8609   // Figure out the correct compare values and jump conditions.
8610   // Handle the first compare/branch as a special case because it might
8611   // jump to the default case.
8612   DCHECK_GT(num_entries, 2u);
8613   Condition first_condition;
8614   uint32_t index;
8615   const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8616   if (lower_bound != 0) {
8617     first_condition = kLess;
8618     __ cmpl(value_reg, Immediate(lower_bound));
8619     __ j(first_condition, codegen_->GetLabelOf(default_block));
8620     __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8621 
8622     index = 1;
8623   } else {
8624     // Handle all the compare/jumps below.
8625     first_condition = kBelow;
8626     index = 0;
8627   }
8628 
8629   // Handle the rest of the compare/jumps.
8630   for (; index + 1 < num_entries; index += 2) {
8631     int32_t compare_to_value = lower_bound + index + 1;
8632     __ cmpl(value_reg, Immediate(compare_to_value));
8633     // Jump to successors[index] if value < case_value[index].
8634     __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8635     // Jump to successors[index + 1] if value == case_value[index + 1].
8636     __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8637   }
8638 
8639   if (index != num_entries) {
8640     // There are an odd number of entries. Handle the last one.
8641     DCHECK_EQ(index + 1, num_entries);
8642     __ cmpl(value_reg, Immediate(lower_bound + index));
8643     __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8644   }
8645 
8646   // And the default for any other value.
8647   if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8648     __ jmp(codegen_->GetLabelOf(default_block));
8649   }
8650 }
8651 
VisitPackedSwitch(HPackedSwitch * switch_instr)8652 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8653   int32_t lower_bound = switch_instr->GetStartValue();
8654   uint32_t num_entries = switch_instr->GetNumEntries();
8655   LocationSummary* locations = switch_instr->GetLocations();
8656   Register value_reg = locations->InAt(0).AsRegister<Register>();
8657 
8658   GenPackedSwitchWithCompares(value_reg,
8659                               lower_bound,
8660                               num_entries,
8661                               switch_instr->GetBlock(),
8662                               switch_instr->GetDefaultBlock());
8663 }
8664 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8665 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8666   LocationSummary* locations =
8667       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8668   locations->SetInAt(0, Location::RequiresRegister());
8669 
8670   // Constant area pointer.
8671   locations->SetInAt(1, Location::RequiresRegister());
8672 
8673   // And the temporary we need.
8674   locations->AddTemp(Location::RequiresRegister());
8675 }
8676 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8677 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8678   int32_t lower_bound = switch_instr->GetStartValue();
8679   uint32_t num_entries = switch_instr->GetNumEntries();
8680   LocationSummary* locations = switch_instr->GetLocations();
8681   Register value_reg = locations->InAt(0).AsRegister<Register>();
8682   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8683 
8684   if (num_entries <= kPackedSwitchJumpTableThreshold) {
8685     GenPackedSwitchWithCompares(value_reg,
8686                                 lower_bound,
8687                                 num_entries,
8688                                 switch_instr->GetBlock(),
8689                                 default_block);
8690     return;
8691   }
8692 
8693   // Optimizing has a jump area.
8694   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8695   Register constant_area = locations->InAt(1).AsRegister<Register>();
8696 
8697   // Remove the bias, if needed.
8698   if (lower_bound != 0) {
8699     __ leal(temp_reg, Address(value_reg, -lower_bound));
8700     value_reg = temp_reg;
8701   }
8702 
8703   // Is the value in range?
8704   DCHECK_GE(num_entries, 1u);
8705   __ cmpl(value_reg, Immediate(num_entries - 1));
8706   __ j(kAbove, codegen_->GetLabelOf(default_block));
8707 
8708   // We are in the range of the table.
8709   // Load (target-constant_area) from the jump table, indexing by the value.
8710   __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8711 
8712   // Compute the actual target address by adding in constant_area.
8713   __ addl(temp_reg, constant_area);
8714 
8715   // And jump.
8716   __ jmp(temp_reg);
8717 }
8718 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8719 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8720     HX86ComputeBaseMethodAddress* insn) {
8721   LocationSummary* locations =
8722       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8723   locations->SetOut(Location::RequiresRegister());
8724 }
8725 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8726 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8727     HX86ComputeBaseMethodAddress* insn) {
8728   LocationSummary* locations = insn->GetLocations();
8729   Register reg = locations->Out().AsRegister<Register>();
8730 
8731   // Generate call to next instruction.
8732   Label next_instruction;
8733   __ call(&next_instruction);
8734   __ Bind(&next_instruction);
8735 
8736   // Remember this offset for later use with constant area.
8737   codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8738 
8739   // Grab the return address off the stack.
8740   __ popl(reg);
8741 }
8742 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8743 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8744     HX86LoadFromConstantTable* insn) {
8745   LocationSummary* locations =
8746       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8747 
8748   locations->SetInAt(0, Location::RequiresRegister());
8749   locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8750 
8751   // If we don't need to be materialized, we only need the inputs to be set.
8752   if (insn->IsEmittedAtUseSite()) {
8753     return;
8754   }
8755 
8756   switch (insn->GetType()) {
8757     case DataType::Type::kFloat32:
8758     case DataType::Type::kFloat64:
8759       locations->SetOut(Location::RequiresFpuRegister());
8760       break;
8761 
8762     case DataType::Type::kInt32:
8763       locations->SetOut(Location::RequiresRegister());
8764       break;
8765 
8766     default:
8767       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8768   }
8769 }
8770 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8771 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8772   if (insn->IsEmittedAtUseSite()) {
8773     return;
8774   }
8775 
8776   LocationSummary* locations = insn->GetLocations();
8777   Location out = locations->Out();
8778   Register const_area = locations->InAt(0).AsRegister<Register>();
8779   HConstant *value = insn->GetConstant();
8780 
8781   switch (insn->GetType()) {
8782     case DataType::Type::kFloat32:
8783       __ movss(out.AsFpuRegister<XmmRegister>(),
8784                codegen_->LiteralFloatAddress(
8785                   value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8786       break;
8787 
8788     case DataType::Type::kFloat64:
8789       __ movsd(out.AsFpuRegister<XmmRegister>(),
8790                codegen_->LiteralDoubleAddress(
8791                   value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8792       break;
8793 
8794     case DataType::Type::kInt32:
8795       __ movl(out.AsRegister<Register>(),
8796               codegen_->LiteralInt32Address(
8797                   value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8798       break;
8799 
8800     default:
8801       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8802   }
8803 }
8804 
8805 /**
8806  * Class to handle late fixup of offsets into constant area.
8807  */
8808 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8809  public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8810   RIPFixup(CodeGeneratorX86& codegen,
8811            HX86ComputeBaseMethodAddress* base_method_address,
8812            size_t offset)
8813       : codegen_(&codegen),
8814         base_method_address_(base_method_address),
8815         offset_into_constant_area_(offset) {}
8816 
8817  protected:
SetOffset(size_t offset)8818   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8819 
8820   CodeGeneratorX86* codegen_;
8821   HX86ComputeBaseMethodAddress* base_method_address_;
8822 
8823  private:
Process(const MemoryRegion & region,int pos)8824   void Process(const MemoryRegion& region, int pos) override {
8825     // Patch the correct offset for the instruction.  The place to patch is the
8826     // last 4 bytes of the instruction.
8827     // The value to patch is the distance from the offset in the constant area
8828     // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8829     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8830     int32_t relative_position =
8831         constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8832 
8833     // Patch in the right value.
8834     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8835   }
8836 
8837   // Location in constant area that the fixup refers to.
8838   int32_t offset_into_constant_area_;
8839 };
8840 
8841 /**
8842  * Class to handle late fixup of offsets to a jump table that will be created in the
8843  * constant area.
8844  */
8845 class JumpTableRIPFixup : public RIPFixup {
8846  public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8847   JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8848       : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8849         switch_instr_(switch_instr) {}
8850 
CreateJumpTable()8851   void CreateJumpTable() {
8852     X86Assembler* assembler = codegen_->GetAssembler();
8853 
8854     // Ensure that the reference to the jump table has the correct offset.
8855     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8856     SetOffset(offset_in_constant_table);
8857 
8858     // The label values in the jump table are computed relative to the
8859     // instruction addressing the constant area.
8860     const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8861 
8862     // Populate the jump table with the correct values for the jump table.
8863     int32_t num_entries = switch_instr_->GetNumEntries();
8864     HBasicBlock* block = switch_instr_->GetBlock();
8865     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8866     // The value that we want is the target offset - the position of the table.
8867     for (int32_t i = 0; i < num_entries; i++) {
8868       HBasicBlock* b = successors[i];
8869       Label* l = codegen_->GetLabelOf(b);
8870       DCHECK(l->IsBound());
8871       int32_t offset_to_block = l->Position() - relative_offset;
8872       assembler->AppendInt32(offset_to_block);
8873     }
8874   }
8875 
8876  private:
8877   const HX86PackedSwitch* switch_instr_;
8878 };
8879 
Finalize(CodeAllocator * allocator)8880 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8881   // Generate the constant area if needed.
8882   X86Assembler* assembler = GetAssembler();
8883 
8884   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8885     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8886     // byte values.
8887     assembler->Align(4, 0);
8888     constant_area_start_ = assembler->CodeSize();
8889 
8890     // Populate any jump tables.
8891     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8892       jump_table->CreateJumpTable();
8893     }
8894 
8895     // And now add the constant area to the generated code.
8896     assembler->AddConstantArea();
8897   }
8898 
8899   // And finish up.
8900   CodeGenerator::Finalize(allocator);
8901 }
8902 
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8903 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8904                                                HX86ComputeBaseMethodAddress* method_base,
8905                                                Register reg) {
8906   AssemblerFixup* fixup =
8907       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8908   return Address(reg, kPlaceholder32BitOffset, fixup);
8909 }
8910 
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8911 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8912                                               HX86ComputeBaseMethodAddress* method_base,
8913                                               Register reg) {
8914   AssemblerFixup* fixup =
8915       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8916   return Address(reg, kPlaceholder32BitOffset, fixup);
8917 }
8918 
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8919 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8920                                               HX86ComputeBaseMethodAddress* method_base,
8921                                               Register reg) {
8922   AssemblerFixup* fixup =
8923       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8924   return Address(reg, kPlaceholder32BitOffset, fixup);
8925 }
8926 
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8927 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8928                                               HX86ComputeBaseMethodAddress* method_base,
8929                                               Register reg) {
8930   AssemblerFixup* fixup =
8931       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8932   return Address(reg, kPlaceholder32BitOffset, fixup);
8933 }
8934 
Load32BitValue(Register dest,int32_t value)8935 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8936   if (value == 0) {
8937     __ xorl(dest, dest);
8938   } else {
8939     __ movl(dest, Immediate(value));
8940   }
8941 }
8942 
Compare32BitValue(Register dest,int32_t value)8943 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8944   if (value == 0) {
8945     __ testl(dest, dest);
8946   } else {
8947     __ cmpl(dest, Immediate(value));
8948   }
8949 }
8950 
GenerateIntCompare(Location lhs,Location rhs)8951 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8952   Register lhs_reg = lhs.AsRegister<Register>();
8953   GenerateIntCompare(lhs_reg, rhs);
8954 }
8955 
GenerateIntCompare(Register lhs,Location rhs)8956 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8957   if (rhs.IsConstant()) {
8958     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8959     Compare32BitValue(lhs, value);
8960   } else if (rhs.IsStackSlot()) {
8961     __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8962   } else {
8963     __ cmpl(lhs, rhs.AsRegister<Register>());
8964   }
8965 }
8966 
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8967 Address CodeGeneratorX86::ArrayAddress(Register obj,
8968                                        Location index,
8969                                        ScaleFactor scale,
8970                                        uint32_t data_offset) {
8971   return index.IsConstant() ?
8972       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8973       Address(obj, index.AsRegister<Register>(), scale, data_offset);
8974 }
8975 
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8976 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8977                                            Register reg,
8978                                            Register value) {
8979   // Create a fixup to be used to create and address the jump table.
8980   JumpTableRIPFixup* table_fixup =
8981       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8982 
8983   // We have to populate the jump tables.
8984   fixups_to_jump_tables_.push_back(table_fixup);
8985 
8986   // We want a scaled address, as we are extracting the correct offset from the table.
8987   return Address(reg, value, TIMES_4, kPlaceholder32BitOffset, table_fixup);
8988 }
8989 
8990 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8991 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8992   if (!target.IsValid()) {
8993     DCHECK_EQ(type, DataType::Type::kVoid);
8994     return;
8995   }
8996 
8997   DCHECK_NE(type, DataType::Type::kVoid);
8998 
8999   Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
9000   if (target.Equals(return_loc)) {
9001     return;
9002   }
9003 
9004   // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
9005   //       with the else branch.
9006   if (type == DataType::Type::kInt64) {
9007     HParallelMove parallel_move(GetGraph()->GetAllocator());
9008     parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
9009     parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
9010     GetMoveResolver()->EmitNativeCode(&parallel_move);
9011   } else {
9012     // Let the parallel move resolver take care of all of this.
9013     HParallelMove parallel_move(GetGraph()->GetAllocator());
9014     parallel_move.AddMove(return_loc, target, type, nullptr);
9015     GetMoveResolver()->EmitNativeCode(&parallel_move);
9016   }
9017 }
9018 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const9019 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
9020                                        const uint8_t* roots_data,
9021                                        const PatchInfo<Label>& info,
9022                                        uint64_t index_in_table) const {
9023   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
9024   uintptr_t address =
9025       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
9026   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
9027   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
9028      dchecked_integral_cast<uint32_t>(address);
9029 }
9030 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)9031 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
9032   for (const PatchInfo<Label>& info : jit_string_patches_) {
9033     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
9034     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
9035     PatchJitRootUse(code, roots_data, info, index_in_table);
9036   }
9037 
9038   for (const PatchInfo<Label>& info : jit_class_patches_) {
9039     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
9040     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
9041     PatchJitRootUse(code, roots_data, info, index_in_table);
9042   }
9043 }
9044 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)9045 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
9046                                                    ATTRIBUTE_UNUSED) {
9047   LOG(FATAL) << "Unreachable";
9048 }
9049 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)9050 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
9051                                                            ATTRIBUTE_UNUSED) {
9052   LOG(FATAL) << "Unreachable";
9053 }
9054 
CpuHasAvxFeatureFlag()9055 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
9056   return codegen_->GetInstructionSetFeatures().HasAVX();
9057 }
CpuHasAvx2FeatureFlag()9058 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
9059   return codegen_->GetInstructionSetFeatures().HasAVX2();
9060 }
CpuHasAvxFeatureFlag()9061 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
9062   return codegen_->GetInstructionSetFeatures().HasAVX();
9063 }
CpuHasAvx2FeatureFlag()9064 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
9065   return codegen_->GetInstructionSetFeatures().HasAVX2();
9066 }
9067 
9068 #undef __
9069 
9070 }  // namespace x86
9071 }  // namespace art
9072