• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86.h"
18 
19 #include "art_method-inl.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "entrypoints/quick/quick_entrypoints_enum.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_x86.h"
30 #include "jit/profiling_info.h"
31 #include "linker/linker_patch.h"
32 #include "lock_word.h"
33 #include "mirror/array-inl.h"
34 #include "mirror/class-inl.h"
35 #include "scoped_thread_state_change-inl.h"
36 #include "thread.h"
37 #include "utils/assembler.h"
38 #include "utils/stack_checks.h"
39 #include "utils/x86/assembler_x86.h"
40 #include "utils/x86/managed_register_x86.h"
41 
42 namespace art {
43 
44 template<class MirrorType>
45 class GcRoot;
46 
47 namespace x86 {
48 
49 static constexpr int kCurrentMethodStackOffset = 0;
50 static constexpr Register kMethodRegisterArgument = EAX;
51 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
52 
53 static constexpr int kC2ConditionMask = 0x400;
54 
55 static constexpr int kFakeReturnRegister = Register(8);
56 
57 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
58 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
59 
OneRegInReferenceOutSaveEverythingCallerSaves()60 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
61   InvokeRuntimeCallingConvention calling_convention;
62   RegisterSet caller_saves = RegisterSet::Empty();
63   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
64   // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
65   // that the the kPrimNot result register is the same as the first argument register.
66   return caller_saves;
67 }
68 
69 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
70 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())->  // NOLINT
71 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
72 
73 class NullCheckSlowPathX86 : public SlowPathCode {
74  public:
NullCheckSlowPathX86(HNullCheck * instruction)75   explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
76 
EmitNativeCode(CodeGenerator * codegen)77   void EmitNativeCode(CodeGenerator* codegen) override {
78     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
79     __ Bind(GetEntryLabel());
80     if (instruction_->CanThrowIntoCatchBlock()) {
81       // Live registers will be restored in the catch block if caught.
82       SaveLiveRegisters(codegen, instruction_->GetLocations());
83     }
84     x86_codegen->InvokeRuntime(kQuickThrowNullPointer,
85                                instruction_,
86                                instruction_->GetDexPc(),
87                                this);
88     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
89   }
90 
IsFatal() const91   bool IsFatal() const override { return true; }
92 
GetDescription() const93   const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
94 
95  private:
96   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
97 };
98 
99 class DivZeroCheckSlowPathX86 : public SlowPathCode {
100  public:
DivZeroCheckSlowPathX86(HDivZeroCheck * instruction)101   explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
102 
EmitNativeCode(CodeGenerator * codegen)103   void EmitNativeCode(CodeGenerator* codegen) override {
104     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
105     __ Bind(GetEntryLabel());
106     x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
107     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
108   }
109 
IsFatal() const110   bool IsFatal() const override { return true; }
111 
GetDescription() const112   const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
113 
114  private:
115   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
116 };
117 
118 class DivRemMinusOneSlowPathX86 : public SlowPathCode {
119  public:
DivRemMinusOneSlowPathX86(HInstruction * instruction,Register reg,bool is_div)120   DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
121       : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
122 
EmitNativeCode(CodeGenerator * codegen)123   void EmitNativeCode(CodeGenerator* codegen) override {
124     __ Bind(GetEntryLabel());
125     if (is_div_) {
126       __ negl(reg_);
127     } else {
128       __ movl(reg_, Immediate(0));
129     }
130     __ jmp(GetExitLabel());
131   }
132 
GetDescription() const133   const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
134 
135  private:
136   Register reg_;
137   bool is_div_;
138   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
139 };
140 
141 class BoundsCheckSlowPathX86 : public SlowPathCode {
142  public:
BoundsCheckSlowPathX86(HBoundsCheck * instruction)143   explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
144 
EmitNativeCode(CodeGenerator * codegen)145   void EmitNativeCode(CodeGenerator* codegen) override {
146     LocationSummary* locations = instruction_->GetLocations();
147     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
148     __ Bind(GetEntryLabel());
149     // We're moving two locations to locations that could overlap, so we need a parallel
150     // move resolver.
151     if (instruction_->CanThrowIntoCatchBlock()) {
152       // Live registers will be restored in the catch block if caught.
153       SaveLiveRegisters(codegen, instruction_->GetLocations());
154     }
155 
156     // Are we using an array length from memory?
157     HInstruction* array_length = instruction_->InputAt(1);
158     Location length_loc = locations->InAt(1);
159     InvokeRuntimeCallingConvention calling_convention;
160     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
161       // Load the array length into our temporary.
162       HArrayLength* length = array_length->AsArrayLength();
163       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
164       Location array_loc = array_length->GetLocations()->InAt(0);
165       Address array_len(array_loc.AsRegister<Register>(), len_offset);
166       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
167       // Check for conflicts with index.
168       if (length_loc.Equals(locations->InAt(0))) {
169         // We know we aren't using parameter 2.
170         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
171       }
172       __ movl(length_loc.AsRegister<Register>(), array_len);
173       if (mirror::kUseStringCompression && length->IsStringLength()) {
174         __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
175       }
176     }
177     x86_codegen->EmitParallelMoves(
178         locations->InAt(0),
179         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
180         DataType::Type::kInt32,
181         length_loc,
182         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
183         DataType::Type::kInt32);
184     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
185         ? kQuickThrowStringBounds
186         : kQuickThrowArrayBounds;
187     x86_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
188     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
189     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
190   }
191 
IsFatal() const192   bool IsFatal() const override { return true; }
193 
GetDescription() const194   const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
195 
196  private:
197   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
198 };
199 
200 class SuspendCheckSlowPathX86 : public SlowPathCode {
201  public:
SuspendCheckSlowPathX86(HSuspendCheck * instruction,HBasicBlock * successor)202   SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
203       : SlowPathCode(instruction), successor_(successor) {}
204 
EmitNativeCode(CodeGenerator * codegen)205   void EmitNativeCode(CodeGenerator* codegen) override {
206     LocationSummary* locations = instruction_->GetLocations();
207     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
208     __ Bind(GetEntryLabel());
209     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
210     x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
211     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
212     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
213     if (successor_ == nullptr) {
214       __ jmp(GetReturnLabel());
215     } else {
216       __ jmp(x86_codegen->GetLabelOf(successor_));
217     }
218   }
219 
GetReturnLabel()220   Label* GetReturnLabel() {
221     DCHECK(successor_ == nullptr);
222     return &return_label_;
223   }
224 
GetSuccessor() const225   HBasicBlock* GetSuccessor() const {
226     return successor_;
227   }
228 
GetDescription() const229   const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
230 
231  private:
232   HBasicBlock* const successor_;
233   Label return_label_;
234 
235   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
236 };
237 
238 class LoadStringSlowPathX86 : public SlowPathCode {
239  public:
LoadStringSlowPathX86(HLoadString * instruction)240   explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
241 
EmitNativeCode(CodeGenerator * codegen)242   void EmitNativeCode(CodeGenerator* codegen) override {
243     LocationSummary* locations = instruction_->GetLocations();
244     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
245 
246     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
247     __ Bind(GetEntryLabel());
248     SaveLiveRegisters(codegen, locations);
249 
250     InvokeRuntimeCallingConvention calling_convention;
251     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
252     __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index.index_));
253     x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
254     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
255     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
256     RestoreLiveRegisters(codegen, locations);
257 
258     __ jmp(GetExitLabel());
259   }
260 
GetDescription() const261   const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
262 
263  private:
264   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
265 };
266 
267 class LoadClassSlowPathX86 : public SlowPathCode {
268  public:
LoadClassSlowPathX86(HLoadClass * cls,HInstruction * at)269   LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
270       : SlowPathCode(at), cls_(cls) {
271     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
272     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
273   }
274 
EmitNativeCode(CodeGenerator * codegen)275   void EmitNativeCode(CodeGenerator* codegen) override {
276     LocationSummary* locations = instruction_->GetLocations();
277     Location out = locations->Out();
278     const uint32_t dex_pc = instruction_->GetDexPc();
279     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
280     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
281 
282     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
283     __ Bind(GetEntryLabel());
284     SaveLiveRegisters(codegen, locations);
285 
286     InvokeRuntimeCallingConvention calling_convention;
287     if (must_resolve_type) {
288       DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()));
289       dex::TypeIndex type_index = cls_->GetTypeIndex();
290       __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
291       x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
292       CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
293       // If we also must_do_clinit, the resolved type is now in the correct register.
294     } else {
295       DCHECK(must_do_clinit);
296       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
297       x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
298     }
299     if (must_do_clinit) {
300       x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
301       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
302     }
303 
304     // Move the class to the desired location.
305     if (out.IsValid()) {
306       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
307       x86_codegen->Move32(out, Location::RegisterLocation(EAX));
308     }
309     RestoreLiveRegisters(codegen, locations);
310     __ jmp(GetExitLabel());
311   }
312 
GetDescription() const313   const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
314 
315  private:
316   // The class this slow path will load.
317   HLoadClass* const cls_;
318 
319   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
320 };
321 
322 class TypeCheckSlowPathX86 : public SlowPathCode {
323  public:
TypeCheckSlowPathX86(HInstruction * instruction,bool is_fatal)324   TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
325       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
326 
EmitNativeCode(CodeGenerator * codegen)327   void EmitNativeCode(CodeGenerator* codegen) override {
328     LocationSummary* locations = instruction_->GetLocations();
329     DCHECK(instruction_->IsCheckCast()
330            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
331 
332     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
333     __ Bind(GetEntryLabel());
334 
335     if (kPoisonHeapReferences &&
336         instruction_->IsCheckCast() &&
337         instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
338       // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
339       __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>());
340     }
341 
342     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
343       SaveLiveRegisters(codegen, locations);
344     }
345 
346     // We're moving two locations to locations that could overlap, so we need a parallel
347     // move resolver.
348     InvokeRuntimeCallingConvention calling_convention;
349     x86_codegen->EmitParallelMoves(locations->InAt(0),
350                                    Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
351                                    DataType::Type::kReference,
352                                    locations->InAt(1),
353                                    Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
354                                    DataType::Type::kReference);
355     if (instruction_->IsInstanceOf()) {
356       x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
357                                  instruction_,
358                                  instruction_->GetDexPc(),
359                                  this);
360       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
361     } else {
362       DCHECK(instruction_->IsCheckCast());
363       x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
364                                  instruction_,
365                                  instruction_->GetDexPc(),
366                                  this);
367       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
368     }
369 
370     if (!is_fatal_) {
371       if (instruction_->IsInstanceOf()) {
372         x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
373       }
374       RestoreLiveRegisters(codegen, locations);
375 
376       __ jmp(GetExitLabel());
377     }
378   }
379 
GetDescription() const380   const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
IsFatal() const381   bool IsFatal() const override { return is_fatal_; }
382 
383  private:
384   const bool is_fatal_;
385 
386   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
387 };
388 
389 class DeoptimizationSlowPathX86 : public SlowPathCode {
390  public:
DeoptimizationSlowPathX86(HDeoptimize * instruction)391   explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
392     : SlowPathCode(instruction) {}
393 
EmitNativeCode(CodeGenerator * codegen)394   void EmitNativeCode(CodeGenerator* codegen) override {
395     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
396     __ Bind(GetEntryLabel());
397     LocationSummary* locations = instruction_->GetLocations();
398     SaveLiveRegisters(codegen, locations);
399     InvokeRuntimeCallingConvention calling_convention;
400     x86_codegen->Load32BitValue(
401         calling_convention.GetRegisterAt(0),
402         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
403     x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
404     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
405   }
406 
GetDescription() const407   const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
408 
409  private:
410   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
411 };
412 
413 class ArraySetSlowPathX86 : public SlowPathCode {
414  public:
ArraySetSlowPathX86(HInstruction * instruction)415   explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
416 
EmitNativeCode(CodeGenerator * codegen)417   void EmitNativeCode(CodeGenerator* codegen) override {
418     LocationSummary* locations = instruction_->GetLocations();
419     __ Bind(GetEntryLabel());
420     SaveLiveRegisters(codegen, locations);
421 
422     InvokeRuntimeCallingConvention calling_convention;
423     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
424     parallel_move.AddMove(
425         locations->InAt(0),
426         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
427         DataType::Type::kReference,
428         nullptr);
429     parallel_move.AddMove(
430         locations->InAt(1),
431         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
432         DataType::Type::kInt32,
433         nullptr);
434     parallel_move.AddMove(
435         locations->InAt(2),
436         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
437         DataType::Type::kReference,
438         nullptr);
439     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
440 
441     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
442     x86_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
443     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
444     RestoreLiveRegisters(codegen, locations);
445     __ jmp(GetExitLabel());
446   }
447 
GetDescription() const448   const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
449 
450  private:
451   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
452 };
453 
454 // Slow path marking an object reference `ref` during a read
455 // barrier. The field `obj.field` in the object `obj` holding this
456 // reference does not get updated by this slow path after marking (see
457 // ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
458 //
459 // This means that after the execution of this slow path, `ref` will
460 // always be up-to-date, but `obj.field` may not; i.e., after the
461 // flip, `ref` will be a to-space reference, but `obj.field` will
462 // probably still be a from-space reference (unless it gets updated by
463 // another thread, or if another thread installed another object
464 // reference (different from `ref`) in `obj.field`).
465 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
466  public:
ReadBarrierMarkSlowPathX86(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)467   ReadBarrierMarkSlowPathX86(HInstruction* instruction,
468                              Location ref,
469                              bool unpoison_ref_before_marking)
470       : SlowPathCode(instruction),
471         ref_(ref),
472         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
473     DCHECK(kEmitCompilerReadBarrier);
474   }
475 
GetDescription() const476   const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
477 
EmitNativeCode(CodeGenerator * codegen)478   void EmitNativeCode(CodeGenerator* codegen) override {
479     LocationSummary* locations = instruction_->GetLocations();
480     Register ref_reg = ref_.AsRegister<Register>();
481     DCHECK(locations->CanCall());
482     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
483     DCHECK(instruction_->IsInstanceFieldGet() ||
484            instruction_->IsStaticFieldGet() ||
485            instruction_->IsArrayGet() ||
486            instruction_->IsArraySet() ||
487            instruction_->IsLoadClass() ||
488            instruction_->IsLoadString() ||
489            instruction_->IsInstanceOf() ||
490            instruction_->IsCheckCast() ||
491            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
492            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
493         << "Unexpected instruction in read barrier marking slow path: "
494         << instruction_->DebugName();
495 
496     __ Bind(GetEntryLabel());
497     if (unpoison_ref_before_marking_) {
498       // Object* ref = ref_addr->AsMirrorPtr()
499       __ MaybeUnpoisonHeapReference(ref_reg);
500     }
501     // No need to save live registers; it's taken care of by the
502     // entrypoint. Also, there is no need to update the stack mask,
503     // as this runtime call will not trigger a garbage collection.
504     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
505     DCHECK_NE(ref_reg, ESP);
506     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
507     // "Compact" slow path, saving two moves.
508     //
509     // Instead of using the standard runtime calling convention (input
510     // and output in EAX):
511     //
512     //   EAX <- ref
513     //   EAX <- ReadBarrierMark(EAX)
514     //   ref <- EAX
515     //
516     // we just use rX (the register containing `ref`) as input and output
517     // of a dedicated entrypoint:
518     //
519     //   rX <- ReadBarrierMarkRegX(rX)
520     //
521     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
522     // This runtime call does not require a stack map.
523     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
524     __ jmp(GetExitLabel());
525   }
526 
527  private:
528   // The location (register) of the marked object reference.
529   const Location ref_;
530   // Should the reference in `ref_` be unpoisoned prior to marking it?
531   const bool unpoison_ref_before_marking_;
532 
533   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
534 };
535 
536 // Slow path marking an object reference `ref` during a read barrier,
537 // and if needed, atomically updating the field `obj.field` in the
538 // object `obj` holding this reference after marking (contrary to
539 // ReadBarrierMarkSlowPathX86 above, which never tries to update
540 // `obj.field`).
541 //
542 // This means that after the execution of this slow path, both `ref`
543 // and `obj.field` will be up-to-date; i.e., after the flip, both will
544 // hold the same to-space reference (unless another thread installed
545 // another object reference (different from `ref`) in `obj.field`).
546 class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
547  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction * instruction,Location ref,Register obj,const Address & field_addr,bool unpoison_ref_before_marking,Register temp)548   ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
549                                            Location ref,
550                                            Register obj,
551                                            const Address& field_addr,
552                                            bool unpoison_ref_before_marking,
553                                            Register temp)
554       : SlowPathCode(instruction),
555         ref_(ref),
556         obj_(obj),
557         field_addr_(field_addr),
558         unpoison_ref_before_marking_(unpoison_ref_before_marking),
559         temp_(temp) {
560     DCHECK(kEmitCompilerReadBarrier);
561   }
562 
GetDescription() const563   const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
564 
EmitNativeCode(CodeGenerator * codegen)565   void EmitNativeCode(CodeGenerator* codegen) override {
566     LocationSummary* locations = instruction_->GetLocations();
567     Register ref_reg = ref_.AsRegister<Register>();
568     DCHECK(locations->CanCall());
569     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
570     // This slow path is only used by the UnsafeCASObject intrinsic.
571     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
572         << "Unexpected instruction in read barrier marking and field updating slow path: "
573         << instruction_->DebugName();
574     DCHECK(instruction_->GetLocations()->Intrinsified());
575     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
576 
577     __ Bind(GetEntryLabel());
578     if (unpoison_ref_before_marking_) {
579       // Object* ref = ref_addr->AsMirrorPtr()
580       __ MaybeUnpoisonHeapReference(ref_reg);
581     }
582 
583     // Save the old (unpoisoned) reference.
584     __ movl(temp_, ref_reg);
585 
586     // No need to save live registers; it's taken care of by the
587     // entrypoint. Also, there is no need to update the stack mask,
588     // as this runtime call will not trigger a garbage collection.
589     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
590     DCHECK_NE(ref_reg, ESP);
591     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
592     // "Compact" slow path, saving two moves.
593     //
594     // Instead of using the standard runtime calling convention (input
595     // and output in EAX):
596     //
597     //   EAX <- ref
598     //   EAX <- ReadBarrierMark(EAX)
599     //   ref <- EAX
600     //
601     // we just use rX (the register containing `ref`) as input and output
602     // of a dedicated entrypoint:
603     //
604     //   rX <- ReadBarrierMarkRegX(rX)
605     //
606     int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
607     // This runtime call does not require a stack map.
608     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
609 
610     // If the new reference is different from the old reference,
611     // update the field in the holder (`*field_addr`).
612     //
613     // Note that this field could also hold a different object, if
614     // another thread had concurrently changed it. In that case, the
615     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
616     // operation below would abort the CAS, leaving the field as-is.
617     NearLabel done;
618     __ cmpl(temp_, ref_reg);
619     __ j(kEqual, &done);
620 
621     // Update the the holder's field atomically.  This may fail if
622     // mutator updates before us, but it's OK.  This is achieved
623     // using a strong compare-and-set (CAS) operation with relaxed
624     // memory synchronization ordering, where the expected value is
625     // the old reference and the desired value is the new reference.
626     // This operation is implemented with a 32-bit LOCK CMPXLCHG
627     // instruction, which requires the expected value (the old
628     // reference) to be in EAX.  Save EAX beforehand, and move the
629     // expected value (stored in `temp_`) into EAX.
630     __ pushl(EAX);
631     __ movl(EAX, temp_);
632 
633     // Convenience aliases.
634     Register base = obj_;
635     Register expected = EAX;
636     Register value = ref_reg;
637 
638     bool base_equals_value = (base == value);
639     if (kPoisonHeapReferences) {
640       if (base_equals_value) {
641         // If `base` and `value` are the same register location, move
642         // `value` to a temporary register.  This way, poisoning
643         // `value` won't invalidate `base`.
644         value = temp_;
645         __ movl(value, base);
646       }
647 
648       // Check that the register allocator did not assign the location
649       // of `expected` (EAX) to `value` nor to `base`, so that heap
650       // poisoning (when enabled) works as intended below.
651       // - If `value` were equal to `expected`, both references would
652       //   be poisoned twice, meaning they would not be poisoned at
653       //   all, as heap poisoning uses address negation.
654       // - If `base` were equal to `expected`, poisoning `expected`
655       //   would invalidate `base`.
656       DCHECK_NE(value, expected);
657       DCHECK_NE(base, expected);
658 
659       __ PoisonHeapReference(expected);
660       __ PoisonHeapReference(value);
661     }
662 
663     __ LockCmpxchgl(field_addr_, value);
664 
665     // If heap poisoning is enabled, we need to unpoison the values
666     // that were poisoned earlier.
667     if (kPoisonHeapReferences) {
668       if (base_equals_value) {
669         // `value` has been moved to a temporary register, no need
670         // to unpoison it.
671       } else {
672         __ UnpoisonHeapReference(value);
673       }
674       // No need to unpoison `expected` (EAX), as it is be overwritten below.
675     }
676 
677     // Restore EAX.
678     __ popl(EAX);
679 
680     __ Bind(&done);
681     __ jmp(GetExitLabel());
682   }
683 
684  private:
685   // The location (register) of the marked object reference.
686   const Location ref_;
687   // The register containing the object holding the marked object reference field.
688   const Register obj_;
689   // The address of the marked reference field.  The base of this address must be `obj_`.
690   const Address field_addr_;
691 
692   // Should the reference in `ref_` be unpoisoned prior to marking it?
693   const bool unpoison_ref_before_marking_;
694 
695   const Register temp_;
696 
697   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
698 };
699 
700 // Slow path generating a read barrier for a heap reference.
701 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
702  public:
ReadBarrierForHeapReferenceSlowPathX86(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)703   ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
704                                          Location out,
705                                          Location ref,
706                                          Location obj,
707                                          uint32_t offset,
708                                          Location index)
709       : SlowPathCode(instruction),
710         out_(out),
711         ref_(ref),
712         obj_(obj),
713         offset_(offset),
714         index_(index) {
715     DCHECK(kEmitCompilerReadBarrier);
716     // If `obj` is equal to `out` or `ref`, it means the initial object
717     // has been overwritten by (or after) the heap object reference load
718     // to be instrumented, e.g.:
719     //
720     //   __ movl(out, Address(out, offset));
721     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
722     //
723     // In that case, we have lost the information about the original
724     // object, and the emitted read barrier cannot work properly.
725     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
726     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
727   }
728 
EmitNativeCode(CodeGenerator * codegen)729   void EmitNativeCode(CodeGenerator* codegen) override {
730     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
731     LocationSummary* locations = instruction_->GetLocations();
732     Register reg_out = out_.AsRegister<Register>();
733     DCHECK(locations->CanCall());
734     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
735     DCHECK(instruction_->IsInstanceFieldGet() ||
736            instruction_->IsStaticFieldGet() ||
737            instruction_->IsArrayGet() ||
738            instruction_->IsInstanceOf() ||
739            instruction_->IsCheckCast() ||
740            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
741         << "Unexpected instruction in read barrier for heap reference slow path: "
742         << instruction_->DebugName();
743 
744     __ Bind(GetEntryLabel());
745     SaveLiveRegisters(codegen, locations);
746 
747     // We may have to change the index's value, but as `index_` is a
748     // constant member (like other "inputs" of this slow path),
749     // introduce a copy of it, `index`.
750     Location index = index_;
751     if (index_.IsValid()) {
752       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
753       if (instruction_->IsArrayGet()) {
754         // Compute the actual memory offset and store it in `index`.
755         Register index_reg = index_.AsRegister<Register>();
756         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
757         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
758           // We are about to change the value of `index_reg` (see the
759           // calls to art::x86::X86Assembler::shll and
760           // art::x86::X86Assembler::AddImmediate below), but it has
761           // not been saved by the previous call to
762           // art::SlowPathCode::SaveLiveRegisters, as it is a
763           // callee-save register --
764           // art::SlowPathCode::SaveLiveRegisters does not consider
765           // callee-save registers, as it has been designed with the
766           // assumption that callee-save registers are supposed to be
767           // handled by the called function.  So, as a callee-save
768           // register, `index_reg` _would_ eventually be saved onto
769           // the stack, but it would be too late: we would have
770           // changed its value earlier.  Therefore, we manually save
771           // it here into another freely available register,
772           // `free_reg`, chosen of course among the caller-save
773           // registers (as a callee-save `free_reg` register would
774           // exhibit the same problem).
775           //
776           // Note we could have requested a temporary register from
777           // the register allocator instead; but we prefer not to, as
778           // this is a slow path, and we know we can find a
779           // caller-save register that is available.
780           Register free_reg = FindAvailableCallerSaveRegister(codegen);
781           __ movl(free_reg, index_reg);
782           index_reg = free_reg;
783           index = Location::RegisterLocation(index_reg);
784         } else {
785           // The initial register stored in `index_` has already been
786           // saved in the call to art::SlowPathCode::SaveLiveRegisters
787           // (as it is not a callee-save register), so we can freely
788           // use it.
789         }
790         // Shifting the index value contained in `index_reg` by the scale
791         // factor (2) cannot overflow in practice, as the runtime is
792         // unable to allocate object arrays with a size larger than
793         // 2^26 - 1 (that is, 2^28 - 4 bytes).
794         __ shll(index_reg, Immediate(TIMES_4));
795         static_assert(
796             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
797             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
798         __ AddImmediate(index_reg, Immediate(offset_));
799       } else {
800         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
801         // intrinsics, `index_` is not shifted by a scale factor of 2
802         // (as in the case of ArrayGet), as it is actually an offset
803         // to an object field within an object.
804         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
805         DCHECK(instruction_->GetLocations()->Intrinsified());
806         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
807                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
808             << instruction_->AsInvoke()->GetIntrinsic();
809         DCHECK_EQ(offset_, 0U);
810         DCHECK(index_.IsRegisterPair());
811         // UnsafeGet's offset location is a register pair, the low
812         // part contains the correct offset.
813         index = index_.ToLow();
814       }
815     }
816 
817     // We're moving two or three locations to locations that could
818     // overlap, so we need a parallel move resolver.
819     InvokeRuntimeCallingConvention calling_convention;
820     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
821     parallel_move.AddMove(ref_,
822                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
823                           DataType::Type::kReference,
824                           nullptr);
825     parallel_move.AddMove(obj_,
826                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
827                           DataType::Type::kReference,
828                           nullptr);
829     if (index.IsValid()) {
830       parallel_move.AddMove(index,
831                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
832                             DataType::Type::kInt32,
833                             nullptr);
834       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
835     } else {
836       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
837       __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
838     }
839     x86_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this);
840     CheckEntrypointTypes<
841         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
842     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
843 
844     RestoreLiveRegisters(codegen, locations);
845     __ jmp(GetExitLabel());
846   }
847 
GetDescription() const848   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
849 
850  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)851   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
852     size_t ref = static_cast<int>(ref_.AsRegister<Register>());
853     size_t obj = static_cast<int>(obj_.AsRegister<Register>());
854     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
855       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
856         return static_cast<Register>(i);
857       }
858     }
859     // We shall never fail to find a free caller-save register, as
860     // there are more than two core caller-save registers on x86
861     // (meaning it is possible to find one which is different from
862     // `ref` and `obj`).
863     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
864     LOG(FATAL) << "Could not find a free caller-save register";
865     UNREACHABLE();
866   }
867 
868   const Location out_;
869   const Location ref_;
870   const Location obj_;
871   const uint32_t offset_;
872   // An additional location containing an index to an array.
873   // Only used for HArrayGet and the UnsafeGetObject &
874   // UnsafeGetObjectVolatile intrinsics.
875   const Location index_;
876 
877   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
878 };
879 
880 // Slow path generating a read barrier for a GC root.
881 class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
882  public:
ReadBarrierForRootSlowPathX86(HInstruction * instruction,Location out,Location root)883   ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
884       : SlowPathCode(instruction), out_(out), root_(root) {
885     DCHECK(kEmitCompilerReadBarrier);
886   }
887 
EmitNativeCode(CodeGenerator * codegen)888   void EmitNativeCode(CodeGenerator* codegen) override {
889     LocationSummary* locations = instruction_->GetLocations();
890     Register reg_out = out_.AsRegister<Register>();
891     DCHECK(locations->CanCall());
892     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
893     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
894         << "Unexpected instruction in read barrier for GC root slow path: "
895         << instruction_->DebugName();
896 
897     __ Bind(GetEntryLabel());
898     SaveLiveRegisters(codegen, locations);
899 
900     InvokeRuntimeCallingConvention calling_convention;
901     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
902     x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
903     x86_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
904                                instruction_,
905                                instruction_->GetDexPc(),
906                                this);
907     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
908     x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
909 
910     RestoreLiveRegisters(codegen, locations);
911     __ jmp(GetExitLabel());
912   }
913 
GetDescription() const914   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
915 
916  private:
917   const Location out_;
918   const Location root_;
919 
920   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
921 };
922 
923 #undef __
924 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
925 #define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
926 
X86Condition(IfCondition cond)927 inline Condition X86Condition(IfCondition cond) {
928   switch (cond) {
929     case kCondEQ: return kEqual;
930     case kCondNE: return kNotEqual;
931     case kCondLT: return kLess;
932     case kCondLE: return kLessEqual;
933     case kCondGT: return kGreater;
934     case kCondGE: return kGreaterEqual;
935     case kCondB:  return kBelow;
936     case kCondBE: return kBelowEqual;
937     case kCondA:  return kAbove;
938     case kCondAE: return kAboveEqual;
939   }
940   LOG(FATAL) << "Unreachable";
941   UNREACHABLE();
942 }
943 
944 // Maps signed condition to unsigned condition and FP condition to x86 name.
X86UnsignedOrFPCondition(IfCondition cond)945 inline Condition X86UnsignedOrFPCondition(IfCondition cond) {
946   switch (cond) {
947     case kCondEQ: return kEqual;
948     case kCondNE: return kNotEqual;
949     // Signed to unsigned, and FP to x86 name.
950     case kCondLT: return kBelow;
951     case kCondLE: return kBelowEqual;
952     case kCondGT: return kAbove;
953     case kCondGE: return kAboveEqual;
954     // Unsigned remain unchanged.
955     case kCondB:  return kBelow;
956     case kCondBE: return kBelowEqual;
957     case kCondA:  return kAbove;
958     case kCondAE: return kAboveEqual;
959   }
960   LOG(FATAL) << "Unreachable";
961   UNREACHABLE();
962 }
963 
DumpCoreRegister(std::ostream & stream,int reg) const964 void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const {
965   stream << Register(reg);
966 }
967 
DumpFloatingPointRegister(std::ostream & stream,int reg) const968 void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
969   stream << XmmRegister(reg);
970 }
971 
GetInstructionSetFeatures() const972 const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
973   return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
974 }
975 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)976 size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
977   __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
978   return kX86WordSize;
979 }
980 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)981 size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
982   __ movl(static_cast<Register>(reg_id), Address(ESP, stack_index));
983   return kX86WordSize;
984 }
985 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)986 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
987   if (GetGraph()->HasSIMD()) {
988     __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
989   } else {
990     __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
991   }
992   return GetSlowPathFPWidth();
993 }
994 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)995 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
996   if (GetGraph()->HasSIMD()) {
997     __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
998   } else {
999     __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
1000   }
1001   return GetSlowPathFPWidth();
1002 }
1003 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1004 void CodeGeneratorX86::InvokeRuntime(QuickEntrypointEnum entrypoint,
1005                                      HInstruction* instruction,
1006                                      uint32_t dex_pc,
1007                                      SlowPathCode* slow_path) {
1008   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1009   GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(entrypoint).Int32Value());
1010   if (EntrypointRequiresStackMap(entrypoint)) {
1011     RecordPcInfo(instruction, dex_pc, slow_path);
1012   }
1013 }
1014 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1015 void CodeGeneratorX86::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1016                                                            HInstruction* instruction,
1017                                                            SlowPathCode* slow_path) {
1018   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1019   GenerateInvokeRuntime(entry_point_offset);
1020 }
1021 
GenerateInvokeRuntime(int32_t entry_point_offset)1022 void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
1023   __ fs()->call(Address::Absolute(entry_point_offset));
1024 }
1025 
CodeGeneratorX86(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1026 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
1027                                    const CompilerOptions& compiler_options,
1028                                    OptimizingCompilerStats* stats)
1029     : CodeGenerator(graph,
1030                     kNumberOfCpuRegisters,
1031                     kNumberOfXmmRegisters,
1032                     kNumberOfRegisterPairs,
1033                     ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1034                                         arraysize(kCoreCalleeSaves))
1035                         | (1 << kFakeReturnRegister),
1036                     0,
1037                     compiler_options,
1038                     stats),
1039       block_labels_(nullptr),
1040       location_builder_(graph, this),
1041       instruction_visitor_(graph, this),
1042       move_resolver_(graph->GetAllocator(), this),
1043       assembler_(graph->GetAllocator()),
1044       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1045       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1046       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1047       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1048       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1049       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1050       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1051       jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1052       jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1053       constant_area_start_(-1),
1054       fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1055       method_address_offset_(std::less<uint32_t>(),
1056                              graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1057   // Use a fake return address register to mimic Quick.
1058   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1059 }
1060 
SetupBlockedRegisters() const1061 void CodeGeneratorX86::SetupBlockedRegisters() const {
1062   // Stack register is always reserved.
1063   blocked_core_registers_[ESP] = true;
1064 }
1065 
InstructionCodeGeneratorX86(HGraph * graph,CodeGeneratorX86 * codegen)1066 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
1067       : InstructionCodeGenerator(graph, codegen),
1068         assembler_(codegen->GetAssembler()),
1069         codegen_(codegen) {}
1070 
DWARFReg(Register reg)1071 static dwarf::Reg DWARFReg(Register reg) {
1072   return dwarf::Reg::X86Core(static_cast<int>(reg));
1073 }
1074 
MaybeIncrementHotness(bool is_frame_entry)1075 void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
1076   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1077     Register reg = EAX;
1078     if (is_frame_entry) {
1079       reg = kMethodRegisterArgument;
1080     } else {
1081       __ pushl(EAX);
1082       __ movl(EAX, Address(ESP, kX86WordSize));
1083     }
1084     NearLabel overflow;
1085     __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1086             Immediate(ArtMethod::MaxCounter()));
1087     __ j(kEqual, &overflow);
1088     __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
1089             Immediate(1));
1090     __ Bind(&overflow);
1091     if (!is_frame_entry) {
1092       __ popl(EAX);
1093     }
1094   }
1095 
1096   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1097     ScopedObjectAccess soa(Thread::Current());
1098     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
1099     if (info != nullptr) {
1100       uint32_t address = reinterpret_cast32<uint32_t>(info);
1101       NearLabel done;
1102       if (HasEmptyFrame()) {
1103         CHECK(is_frame_entry);
1104         // Alignment
1105         __ subl(ESP, Immediate(8));
1106         __ cfi().AdjustCFAOffset(8);
1107         // We need a temporary. The stub also expects the method at bottom of stack.
1108         __ pushl(EAX);
1109         __ cfi().AdjustCFAOffset(4);
1110         __ movl(EAX, Immediate(address));
1111         __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1112                 Immediate(1));
1113         __ j(kCarryClear, &done);
1114         GenerateInvokeRuntime(
1115             GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1116         __ Bind(&done);
1117         // We don't strictly require to restore EAX, but this makes the generated
1118         // code easier to reason about.
1119         __ popl(EAX);
1120         __ cfi().AdjustCFAOffset(-4);
1121         __ addl(ESP, Immediate(8));
1122         __ cfi().AdjustCFAOffset(-8);
1123       } else {
1124         if (!RequiresCurrentMethod()) {
1125           CHECK(is_frame_entry);
1126           __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1127         }
1128         // We need a temporary.
1129         __ pushl(EAX);
1130         __ cfi().AdjustCFAOffset(4);
1131         __ movl(EAX, Immediate(address));
1132         __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1133                 Immediate(1));
1134         __ popl(EAX);  // Put stack as expected before exiting or calling stub.
1135         __ cfi().AdjustCFAOffset(-4);
1136         __ j(kCarryClear, &done);
1137         GenerateInvokeRuntime(
1138             GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
1139         __ Bind(&done);
1140       }
1141     }
1142   }
1143 }
1144 
GenerateFrameEntry()1145 void CodeGeneratorX86::GenerateFrameEntry() {
1146   __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
1147   __ Bind(&frame_entry_label_);
1148   bool skip_overflow_check =
1149       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
1150   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1151 
1152   if (!skip_overflow_check) {
1153     size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
1154     __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
1155     RecordPcInfo(nullptr, 0);
1156   }
1157 
1158   if (!HasEmptyFrame()) {
1159     for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1160       Register reg = kCoreCalleeSaves[i];
1161       if (allocated_registers_.ContainsCoreRegister(reg)) {
1162         __ pushl(reg);
1163         __ cfi().AdjustCFAOffset(kX86WordSize);
1164         __ cfi().RelOffset(DWARFReg(reg), 0);
1165       }
1166     }
1167 
1168     int adjust = GetFrameSize() - FrameEntrySpillSize();
1169     __ subl(ESP, Immediate(adjust));
1170     __ cfi().AdjustCFAOffset(adjust);
1171     // Save the current method if we need it. Note that we do not
1172     // do this in HCurrentMethod, as the instruction might have been removed
1173     // in the SSA graph.
1174     if (RequiresCurrentMethod()) {
1175       __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
1176     }
1177 
1178     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1179       // Initialize should_deoptimize flag to 0.
1180       __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1181     }
1182   }
1183 
1184   MaybeIncrementHotness(/* is_frame_entry= */ true);
1185 }
1186 
GenerateFrameExit()1187 void CodeGeneratorX86::GenerateFrameExit() {
1188   __ cfi().RememberState();
1189   if (!HasEmptyFrame()) {
1190     int adjust = GetFrameSize() - FrameEntrySpillSize();
1191     __ addl(ESP, Immediate(adjust));
1192     __ cfi().AdjustCFAOffset(-adjust);
1193 
1194     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1195       Register reg = kCoreCalleeSaves[i];
1196       if (allocated_registers_.ContainsCoreRegister(reg)) {
1197         __ popl(reg);
1198         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
1199         __ cfi().Restore(DWARFReg(reg));
1200       }
1201     }
1202   }
1203   __ ret();
1204   __ cfi().RestoreState();
1205   __ cfi().DefCFAOffset(GetFrameSize());
1206 }
1207 
Bind(HBasicBlock * block)1208 void CodeGeneratorX86::Bind(HBasicBlock* block) {
1209   __ Bind(GetLabelOf(block));
1210 }
1211 
GetReturnLocation(DataType::Type type) const1212 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const {
1213   switch (type) {
1214     case DataType::Type::kReference:
1215     case DataType::Type::kBool:
1216     case DataType::Type::kUint8:
1217     case DataType::Type::kInt8:
1218     case DataType::Type::kUint16:
1219     case DataType::Type::kInt16:
1220     case DataType::Type::kUint32:
1221     case DataType::Type::kInt32:
1222       return Location::RegisterLocation(EAX);
1223 
1224     case DataType::Type::kUint64:
1225     case DataType::Type::kInt64:
1226       return Location::RegisterPairLocation(EAX, EDX);
1227 
1228     case DataType::Type::kVoid:
1229       return Location::NoLocation();
1230 
1231     case DataType::Type::kFloat64:
1232     case DataType::Type::kFloat32:
1233       return Location::FpuRegisterLocation(XMM0);
1234   }
1235 
1236   UNREACHABLE();
1237 }
1238 
GetMethodLocation() const1239 Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const {
1240   return Location::RegisterLocation(kMethodRegisterArgument);
1241 }
1242 
GetNextLocation(DataType::Type type)1243 Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) {
1244   switch (type) {
1245     case DataType::Type::kReference:
1246     case DataType::Type::kBool:
1247     case DataType::Type::kUint8:
1248     case DataType::Type::kInt8:
1249     case DataType::Type::kUint16:
1250     case DataType::Type::kInt16:
1251     case DataType::Type::kInt32: {
1252       uint32_t index = gp_index_++;
1253       stack_index_++;
1254       if (index < calling_convention.GetNumberOfRegisters()) {
1255         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
1256       } else {
1257         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1258       }
1259     }
1260 
1261     case DataType::Type::kInt64: {
1262       uint32_t index = gp_index_;
1263       gp_index_ += 2;
1264       stack_index_ += 2;
1265       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
1266         X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
1267             calling_convention.GetRegisterPairAt(index));
1268         return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
1269       } else {
1270         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1271       }
1272     }
1273 
1274     case DataType::Type::kFloat32: {
1275       uint32_t index = float_index_++;
1276       stack_index_++;
1277       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1278         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1279       } else {
1280         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
1281       }
1282     }
1283 
1284     case DataType::Type::kFloat64: {
1285       uint32_t index = float_index_++;
1286       stack_index_ += 2;
1287       if (index < calling_convention.GetNumberOfFpuRegisters()) {
1288         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
1289       } else {
1290         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
1291       }
1292     }
1293 
1294     case DataType::Type::kUint32:
1295     case DataType::Type::kUint64:
1296     case DataType::Type::kVoid:
1297       LOG(FATAL) << "Unexpected parameter type " << type;
1298       UNREACHABLE();
1299   }
1300   return Location::NoLocation();
1301 }
1302 
Move32(Location destination,Location source)1303 void CodeGeneratorX86::Move32(Location destination, Location source) {
1304   if (source.Equals(destination)) {
1305     return;
1306   }
1307   if (destination.IsRegister()) {
1308     if (source.IsRegister()) {
1309       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
1310     } else if (source.IsFpuRegister()) {
1311       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
1312     } else {
1313       DCHECK(source.IsStackSlot());
1314       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
1315     }
1316   } else if (destination.IsFpuRegister()) {
1317     if (source.IsRegister()) {
1318       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
1319     } else if (source.IsFpuRegister()) {
1320       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1321     } else {
1322       DCHECK(source.IsStackSlot());
1323       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1324     }
1325   } else {
1326     DCHECK(destination.IsStackSlot()) << destination;
1327     if (source.IsRegister()) {
1328       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
1329     } else if (source.IsFpuRegister()) {
1330       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1331     } else if (source.IsConstant()) {
1332       HConstant* constant = source.GetConstant();
1333       int32_t value = GetInt32ValueOf(constant);
1334       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
1335     } else {
1336       DCHECK(source.IsStackSlot());
1337       __ pushl(Address(ESP, source.GetStackIndex()));
1338       __ popl(Address(ESP, destination.GetStackIndex()));
1339     }
1340   }
1341 }
1342 
Move64(Location destination,Location source)1343 void CodeGeneratorX86::Move64(Location destination, Location source) {
1344   if (source.Equals(destination)) {
1345     return;
1346   }
1347   if (destination.IsRegisterPair()) {
1348     if (source.IsRegisterPair()) {
1349       EmitParallelMoves(
1350           Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
1351           Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
1352           DataType::Type::kInt32,
1353           Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
1354           Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
1355           DataType::Type::kInt32);
1356     } else if (source.IsFpuRegister()) {
1357       XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
1358       __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
1359       __ psrlq(src_reg, Immediate(32));
1360       __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
1361     } else {
1362       // No conflict possible, so just do the moves.
1363       DCHECK(source.IsDoubleStackSlot());
1364       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
1365       __ movl(destination.AsRegisterPairHigh<Register>(),
1366               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
1367     }
1368   } else if (destination.IsFpuRegister()) {
1369     if (source.IsFpuRegister()) {
1370       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
1371     } else if (source.IsDoubleStackSlot()) {
1372       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
1373     } else if (source.IsRegisterPair()) {
1374       size_t elem_size = DataType::Size(DataType::Type::kInt32);
1375       // Create stack space for 2 elements.
1376       __ subl(ESP, Immediate(2 * elem_size));
1377       __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
1378       __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
1379       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
1380       // And remove the temporary stack space we allocated.
1381       __ addl(ESP, Immediate(2 * elem_size));
1382     } else {
1383       LOG(FATAL) << "Unimplemented";
1384     }
1385   } else {
1386     DCHECK(destination.IsDoubleStackSlot()) << destination;
1387     if (source.IsRegisterPair()) {
1388       // No conflict possible, so just do the moves.
1389       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
1390       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1391               source.AsRegisterPairHigh<Register>());
1392     } else if (source.IsFpuRegister()) {
1393       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
1394     } else if (source.IsConstant()) {
1395       HConstant* constant = source.GetConstant();
1396       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1397       int64_t value = GetInt64ValueOf(constant);
1398       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
1399       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
1400               Immediate(High32Bits(value)));
1401     } else {
1402       DCHECK(source.IsDoubleStackSlot()) << source;
1403       EmitParallelMoves(
1404           Location::StackSlot(source.GetStackIndex()),
1405           Location::StackSlot(destination.GetStackIndex()),
1406           DataType::Type::kInt32,
1407           Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
1408           Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
1409           DataType::Type::kInt32);
1410     }
1411   }
1412 }
1413 
MoveConstant(Location location,int32_t value)1414 void CodeGeneratorX86::MoveConstant(Location location, int32_t value) {
1415   DCHECK(location.IsRegister());
1416   __ movl(location.AsRegister<Register>(), Immediate(value));
1417 }
1418 
MoveLocation(Location dst,Location src,DataType::Type dst_type)1419 void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) {
1420   HParallelMove move(GetGraph()->GetAllocator());
1421   if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) {
1422     move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr);
1423     move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr);
1424   } else {
1425     move.AddMove(src, dst, dst_type, nullptr);
1426   }
1427   GetMoveResolver()->EmitNativeCode(&move);
1428 }
1429 
AddLocationAsTemp(Location location,LocationSummary * locations)1430 void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) {
1431   if (location.IsRegister()) {
1432     locations->AddTemp(location);
1433   } else if (location.IsRegisterPair()) {
1434     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>()));
1435     locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>()));
1436   } else {
1437     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1438   }
1439 }
1440 
HandleGoto(HInstruction * got,HBasicBlock * successor)1441 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1442   if (successor->IsExitBlock()) {
1443     DCHECK(got->GetPrevious()->AlwaysThrows());
1444     return;  // no code needed
1445   }
1446 
1447   HBasicBlock* block = got->GetBlock();
1448   HInstruction* previous = got->GetPrevious();
1449 
1450   HLoopInformation* info = block->GetLoopInformation();
1451   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1452     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1453     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1454     return;
1455   }
1456 
1457   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1458     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1459   }
1460   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1461     __ jmp(codegen_->GetLabelOf(successor));
1462   }
1463 }
1464 
VisitGoto(HGoto * got)1465 void LocationsBuilderX86::VisitGoto(HGoto* got) {
1466   got->SetLocations(nullptr);
1467 }
1468 
VisitGoto(HGoto * got)1469 void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
1470   HandleGoto(got, got->GetSuccessor());
1471 }
1472 
VisitTryBoundary(HTryBoundary * try_boundary)1473 void LocationsBuilderX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1474   try_boundary->SetLocations(nullptr);
1475 }
1476 
VisitTryBoundary(HTryBoundary * try_boundary)1477 void InstructionCodeGeneratorX86::VisitTryBoundary(HTryBoundary* try_boundary) {
1478   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1479   if (!successor->IsExitBlock()) {
1480     HandleGoto(try_boundary, successor);
1481   }
1482 }
1483 
VisitExit(HExit * exit)1484 void LocationsBuilderX86::VisitExit(HExit* exit) {
1485   exit->SetLocations(nullptr);
1486 }
1487 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1488 void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1489 }
1490 
1491 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1492 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
1493                                                   LabelType* true_label,
1494                                                   LabelType* false_label) {
1495   if (cond->IsFPConditionTrueIfNaN()) {
1496     __ j(kUnordered, true_label);
1497   } else if (cond->IsFPConditionFalseIfNaN()) {
1498     __ j(kUnordered, false_label);
1499   }
1500   __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label);
1501 }
1502 
1503 template<class LabelType>
GenerateLongComparesAndJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1504 void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
1505                                                                LabelType* true_label,
1506                                                                LabelType* false_label) {
1507   LocationSummary* locations = cond->GetLocations();
1508   Location left = locations->InAt(0);
1509   Location right = locations->InAt(1);
1510   IfCondition if_cond = cond->GetCondition();
1511 
1512   Register left_high = left.AsRegisterPairHigh<Register>();
1513   Register left_low = left.AsRegisterPairLow<Register>();
1514   IfCondition true_high_cond = if_cond;
1515   IfCondition false_high_cond = cond->GetOppositeCondition();
1516   Condition final_condition = X86UnsignedOrFPCondition(if_cond);  // unsigned on lower part
1517 
1518   // Set the conditions for the test, remembering that == needs to be
1519   // decided using the low words.
1520   switch (if_cond) {
1521     case kCondEQ:
1522     case kCondNE:
1523       // Nothing to do.
1524       break;
1525     case kCondLT:
1526       false_high_cond = kCondGT;
1527       break;
1528     case kCondLE:
1529       true_high_cond = kCondLT;
1530       break;
1531     case kCondGT:
1532       false_high_cond = kCondLT;
1533       break;
1534     case kCondGE:
1535       true_high_cond = kCondGT;
1536       break;
1537     case kCondB:
1538       false_high_cond = kCondA;
1539       break;
1540     case kCondBE:
1541       true_high_cond = kCondB;
1542       break;
1543     case kCondA:
1544       false_high_cond = kCondB;
1545       break;
1546     case kCondAE:
1547       true_high_cond = kCondA;
1548       break;
1549   }
1550 
1551   if (right.IsConstant()) {
1552     int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1553     int32_t val_high = High32Bits(value);
1554     int32_t val_low = Low32Bits(value);
1555 
1556     codegen_->Compare32BitValue(left_high, val_high);
1557     if (if_cond == kCondNE) {
1558       __ j(X86Condition(true_high_cond), true_label);
1559     } else if (if_cond == kCondEQ) {
1560       __ j(X86Condition(false_high_cond), false_label);
1561     } else {
1562       __ j(X86Condition(true_high_cond), true_label);
1563       __ j(X86Condition(false_high_cond), false_label);
1564     }
1565     // Must be equal high, so compare the lows.
1566     codegen_->Compare32BitValue(left_low, val_low);
1567   } else if (right.IsRegisterPair()) {
1568     Register right_high = right.AsRegisterPairHigh<Register>();
1569     Register right_low = right.AsRegisterPairLow<Register>();
1570 
1571     __ cmpl(left_high, right_high);
1572     if (if_cond == kCondNE) {
1573       __ j(X86Condition(true_high_cond), true_label);
1574     } else if (if_cond == kCondEQ) {
1575       __ j(X86Condition(false_high_cond), false_label);
1576     } else {
1577       __ j(X86Condition(true_high_cond), true_label);
1578       __ j(X86Condition(false_high_cond), false_label);
1579     }
1580     // Must be equal high, so compare the lows.
1581     __ cmpl(left_low, right_low);
1582   } else {
1583     DCHECK(right.IsDoubleStackSlot());
1584     __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
1585     if (if_cond == kCondNE) {
1586       __ j(X86Condition(true_high_cond), true_label);
1587     } else if (if_cond == kCondEQ) {
1588       __ j(X86Condition(false_high_cond), false_label);
1589     } else {
1590       __ j(X86Condition(true_high_cond), true_label);
1591       __ j(X86Condition(false_high_cond), false_label);
1592     }
1593     // Must be equal high, so compare the lows.
1594     __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
1595   }
1596   // The last comparison might be unsigned.
1597   __ j(final_condition, true_label);
1598 }
1599 
GenerateFPCompare(Location lhs,Location rhs,HInstruction * insn,bool is_double)1600 void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
1601                                                     Location rhs,
1602                                                     HInstruction* insn,
1603                                                     bool is_double) {
1604   HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
1605   if (is_double) {
1606     if (rhs.IsFpuRegister()) {
1607       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1608     } else if (const_area != nullptr) {
1609       DCHECK(const_area->IsEmittedAtUseSite());
1610       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(),
1611                  codegen_->LiteralDoubleAddress(
1612                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
1613                      const_area->GetBaseMethodAddress(),
1614                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1615     } else {
1616       DCHECK(rhs.IsDoubleStackSlot());
1617       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1618     }
1619   } else {
1620     if (rhs.IsFpuRegister()) {
1621       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
1622     } else if (const_area != nullptr) {
1623       DCHECK(const_area->IsEmittedAtUseSite());
1624       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(),
1625                  codegen_->LiteralFloatAddress(
1626                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
1627                      const_area->GetBaseMethodAddress(),
1628                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
1629     } else {
1630       DCHECK(rhs.IsStackSlot());
1631       __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex()));
1632     }
1633   }
1634 }
1635 
1636 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1637 void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
1638                                                                LabelType* true_target_in,
1639                                                                LabelType* false_target_in) {
1640   // Generated branching requires both targets to be explicit. If either of the
1641   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1642   LabelType fallthrough_target;
1643   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1644   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1645 
1646   LocationSummary* locations = condition->GetLocations();
1647   Location left = locations->InAt(0);
1648   Location right = locations->InAt(1);
1649 
1650   DataType::Type type = condition->InputAt(0)->GetType();
1651   switch (type) {
1652     case DataType::Type::kInt64:
1653       GenerateLongComparesAndJumps(condition, true_target, false_target);
1654       break;
1655     case DataType::Type::kFloat32:
1656       GenerateFPCompare(left, right, condition, false);
1657       GenerateFPJumps(condition, true_target, false_target);
1658       break;
1659     case DataType::Type::kFloat64:
1660       GenerateFPCompare(left, right, condition, true);
1661       GenerateFPJumps(condition, true_target, false_target);
1662       break;
1663     default:
1664       LOG(FATAL) << "Unexpected compare type " << type;
1665   }
1666 
1667   if (false_target != &fallthrough_target) {
1668     __ jmp(false_target);
1669   }
1670 
1671   if (fallthrough_target.IsLinked()) {
1672     __ Bind(&fallthrough_target);
1673   }
1674 }
1675 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1676 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1677   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1678   // are set only strictly before `branch`. We can't use the eflags on long/FP
1679   // conditions if they are materialized due to the complex branching.
1680   return cond->IsCondition() &&
1681          cond->GetNext() == branch &&
1682          cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
1683          !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1684 }
1685 
1686 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1687 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
1688                                                         size_t condition_input_index,
1689                                                         LabelType* true_target,
1690                                                         LabelType* false_target) {
1691   HInstruction* cond = instruction->InputAt(condition_input_index);
1692 
1693   if (true_target == nullptr && false_target == nullptr) {
1694     // Nothing to do. The code always falls through.
1695     return;
1696   } else if (cond->IsIntConstant()) {
1697     // Constant condition, statically compared against "true" (integer value 1).
1698     if (cond->AsIntConstant()->IsTrue()) {
1699       if (true_target != nullptr) {
1700         __ jmp(true_target);
1701       }
1702     } else {
1703       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1704       if (false_target != nullptr) {
1705         __ jmp(false_target);
1706       }
1707     }
1708     return;
1709   }
1710 
1711   // The following code generates these patterns:
1712   //  (1) true_target == nullptr && false_target != nullptr
1713   //        - opposite condition true => branch to false_target
1714   //  (2) true_target != nullptr && false_target == nullptr
1715   //        - condition true => branch to true_target
1716   //  (3) true_target != nullptr && false_target != nullptr
1717   //        - condition true => branch to true_target
1718   //        - branch to false_target
1719   if (IsBooleanValueOrMaterializedCondition(cond)) {
1720     if (AreEflagsSetFrom(cond, instruction)) {
1721       if (true_target == nullptr) {
1722         __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
1723       } else {
1724         __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
1725       }
1726     } else {
1727       // Materialized condition, compare against 0.
1728       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1729       if (lhs.IsRegister()) {
1730         __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
1731       } else {
1732         __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
1733       }
1734       if (true_target == nullptr) {
1735         __ j(kEqual, false_target);
1736       } else {
1737         __ j(kNotEqual, true_target);
1738       }
1739     }
1740   } else {
1741     // Condition has not been materialized, use its inputs as the comparison and
1742     // its condition as the branch condition.
1743     HCondition* condition = cond->AsCondition();
1744 
1745     // If this is a long or FP comparison that has been folded into
1746     // the HCondition, generate the comparison directly.
1747     DataType::Type type = condition->InputAt(0)->GetType();
1748     if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1749       GenerateCompareTestAndBranch(condition, true_target, false_target);
1750       return;
1751     }
1752 
1753     Location lhs = condition->GetLocations()->InAt(0);
1754     Location rhs = condition->GetLocations()->InAt(1);
1755     // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
1756     codegen_->GenerateIntCompare(lhs, rhs);
1757     if (true_target == nullptr) {
1758       __ j(X86Condition(condition->GetOppositeCondition()), false_target);
1759     } else {
1760       __ j(X86Condition(condition->GetCondition()), true_target);
1761     }
1762   }
1763 
1764   // If neither branch falls through (case 3), the conditional branch to `true_target`
1765   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1766   if (true_target != nullptr && false_target != nullptr) {
1767     __ jmp(false_target);
1768   }
1769 }
1770 
VisitIf(HIf * if_instr)1771 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
1772   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1773   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1774     locations->SetInAt(0, Location::Any());
1775   }
1776 }
1777 
VisitIf(HIf * if_instr)1778 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
1779   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1780   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1781   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1782       nullptr : codegen_->GetLabelOf(true_successor);
1783   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1784       nullptr : codegen_->GetLabelOf(false_successor);
1785   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1786 }
1787 
VisitDeoptimize(HDeoptimize * deoptimize)1788 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1789   LocationSummary* locations = new (GetGraph()->GetAllocator())
1790       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1791   InvokeRuntimeCallingConvention calling_convention;
1792   RegisterSet caller_saves = RegisterSet::Empty();
1793   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1794   locations->SetCustomSlowPathCallerSaves(caller_saves);
1795   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1796     locations->SetInAt(0, Location::Any());
1797   }
1798 }
1799 
VisitDeoptimize(HDeoptimize * deoptimize)1800 void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
1801   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
1802   GenerateTestAndBranch<Label>(deoptimize,
1803                                /* condition_input_index= */ 0,
1804                                slow_path->GetEntryLabel(),
1805                                /* false_target= */ nullptr);
1806 }
1807 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1808 void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1809   LocationSummary* locations = new (GetGraph()->GetAllocator())
1810       LocationSummary(flag, LocationSummary::kNoCall);
1811   locations->SetOut(Location::RequiresRegister());
1812 }
1813 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1814 void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1815   __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
1816           Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1817 }
1818 
SelectCanUseCMOV(HSelect * select)1819 static bool SelectCanUseCMOV(HSelect* select) {
1820   // There are no conditional move instructions for XMMs.
1821   if (DataType::IsFloatingPointType(select->GetType())) {
1822     return false;
1823   }
1824 
1825   // A FP condition doesn't generate the single CC that we need.
1826   // In 32 bit mode, a long condition doesn't generate a single CC either.
1827   HInstruction* condition = select->GetCondition();
1828   if (condition->IsCondition()) {
1829     DataType::Type compare_type = condition->InputAt(0)->GetType();
1830     if (compare_type == DataType::Type::kInt64 ||
1831         DataType::IsFloatingPointType(compare_type)) {
1832       return false;
1833     }
1834   }
1835 
1836   // We can generate a CMOV for this Select.
1837   return true;
1838 }
1839 
VisitSelect(HSelect * select)1840 void LocationsBuilderX86::VisitSelect(HSelect* select) {
1841   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1842   if (DataType::IsFloatingPointType(select->GetType())) {
1843     locations->SetInAt(0, Location::RequiresFpuRegister());
1844     locations->SetInAt(1, Location::Any());
1845   } else {
1846     locations->SetInAt(0, Location::RequiresRegister());
1847     if (SelectCanUseCMOV(select)) {
1848       if (select->InputAt(1)->IsConstant()) {
1849         // Cmov can't handle a constant value.
1850         locations->SetInAt(1, Location::RequiresRegister());
1851       } else {
1852         locations->SetInAt(1, Location::Any());
1853       }
1854     } else {
1855       locations->SetInAt(1, Location::Any());
1856     }
1857   }
1858   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1859     locations->SetInAt(2, Location::RequiresRegister());
1860   }
1861   locations->SetOut(Location::SameAsFirstInput());
1862 }
1863 
VisitSelect(HSelect * select)1864 void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
1865   LocationSummary* locations = select->GetLocations();
1866   DCHECK(locations->InAt(0).Equals(locations->Out()));
1867   if (SelectCanUseCMOV(select)) {
1868     // If both the condition and the source types are integer, we can generate
1869     // a CMOV to implement Select.
1870 
1871     HInstruction* select_condition = select->GetCondition();
1872     Condition cond = kNotEqual;
1873 
1874     // Figure out how to test the 'condition'.
1875     if (select_condition->IsCondition()) {
1876       HCondition* condition = select_condition->AsCondition();
1877       if (!condition->IsEmittedAtUseSite()) {
1878         // This was a previously materialized condition.
1879         // Can we use the existing condition code?
1880         if (AreEflagsSetFrom(condition, select)) {
1881           // Materialization was the previous instruction. Condition codes are right.
1882           cond = X86Condition(condition->GetCondition());
1883         } else {
1884           // No, we have to recreate the condition code.
1885           Register cond_reg = locations->InAt(2).AsRegister<Register>();
1886           __ testl(cond_reg, cond_reg);
1887         }
1888       } else {
1889         // We can't handle FP or long here.
1890         DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64);
1891         DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType()));
1892         LocationSummary* cond_locations = condition->GetLocations();
1893         codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
1894         cond = X86Condition(condition->GetCondition());
1895       }
1896     } else {
1897       // Must be a Boolean condition, which needs to be compared to 0.
1898       Register cond_reg = locations->InAt(2).AsRegister<Register>();
1899       __ testl(cond_reg, cond_reg);
1900     }
1901 
1902     // If the condition is true, overwrite the output, which already contains false.
1903     Location false_loc = locations->InAt(0);
1904     Location true_loc = locations->InAt(1);
1905     if (select->GetType() == DataType::Type::kInt64) {
1906       // 64 bit conditional move.
1907       Register false_high = false_loc.AsRegisterPairHigh<Register>();
1908       Register false_low = false_loc.AsRegisterPairLow<Register>();
1909       if (true_loc.IsRegisterPair()) {
1910         __ cmovl(cond, false_high, true_loc.AsRegisterPairHigh<Register>());
1911         __ cmovl(cond, false_low, true_loc.AsRegisterPairLow<Register>());
1912       } else {
1913         __ cmovl(cond, false_high, Address(ESP, true_loc.GetHighStackIndex(kX86WordSize)));
1914         __ cmovl(cond, false_low, Address(ESP, true_loc.GetStackIndex()));
1915       }
1916     } else {
1917       // 32 bit conditional move.
1918       Register false_reg = false_loc.AsRegister<Register>();
1919       if (true_loc.IsRegister()) {
1920         __ cmovl(cond, false_reg, true_loc.AsRegister<Register>());
1921       } else {
1922         __ cmovl(cond, false_reg, Address(ESP, true_loc.GetStackIndex()));
1923       }
1924     }
1925   } else {
1926     NearLabel false_target;
1927     GenerateTestAndBranch<NearLabel>(
1928         select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
1929     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1930     __ Bind(&false_target);
1931   }
1932 }
1933 
VisitNativeDebugInfo(HNativeDebugInfo * info)1934 void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1935   new (GetGraph()->GetAllocator()) LocationSummary(info);
1936 }
1937 
VisitNativeDebugInfo(HNativeDebugInfo *)1938 void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
1939   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1940 }
1941 
GenerateNop()1942 void CodeGeneratorX86::GenerateNop() {
1943   __ nop();
1944 }
1945 
HandleCondition(HCondition * cond)1946 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
1947   LocationSummary* locations =
1948       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1949   // Handle the long/FP comparisons made in instruction simplification.
1950   switch (cond->InputAt(0)->GetType()) {
1951     case DataType::Type::kInt64: {
1952       locations->SetInAt(0, Location::RequiresRegister());
1953       locations->SetInAt(1, Location::Any());
1954       if (!cond->IsEmittedAtUseSite()) {
1955         locations->SetOut(Location::RequiresRegister());
1956       }
1957       break;
1958     }
1959     case DataType::Type::kFloat32:
1960     case DataType::Type::kFloat64: {
1961       locations->SetInAt(0, Location::RequiresFpuRegister());
1962       if (cond->InputAt(1)->IsX86LoadFromConstantTable()) {
1963         DCHECK(cond->InputAt(1)->IsEmittedAtUseSite());
1964       } else if (cond->InputAt(1)->IsConstant()) {
1965         locations->SetInAt(1, Location::RequiresFpuRegister());
1966       } else {
1967         locations->SetInAt(1, Location::Any());
1968       }
1969       if (!cond->IsEmittedAtUseSite()) {
1970         locations->SetOut(Location::RequiresRegister());
1971       }
1972       break;
1973     }
1974     default:
1975       locations->SetInAt(0, Location::RequiresRegister());
1976       locations->SetInAt(1, Location::Any());
1977       if (!cond->IsEmittedAtUseSite()) {
1978         // We need a byte register.
1979         locations->SetOut(Location::RegisterLocation(ECX));
1980       }
1981       break;
1982   }
1983 }
1984 
HandleCondition(HCondition * cond)1985 void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
1986   if (cond->IsEmittedAtUseSite()) {
1987     return;
1988   }
1989 
1990   LocationSummary* locations = cond->GetLocations();
1991   Location lhs = locations->InAt(0);
1992   Location rhs = locations->InAt(1);
1993   Register reg = locations->Out().AsRegister<Register>();
1994   NearLabel true_label, false_label;
1995 
1996   switch (cond->InputAt(0)->GetType()) {
1997     default: {
1998       // Integer case.
1999 
2000       // Clear output register: setb only sets the low byte.
2001       __ xorl(reg, reg);
2002       codegen_->GenerateIntCompare(lhs, rhs);
2003       __ setb(X86Condition(cond->GetCondition()), reg);
2004       return;
2005     }
2006     case DataType::Type::kInt64:
2007       GenerateLongComparesAndJumps(cond, &true_label, &false_label);
2008       break;
2009     case DataType::Type::kFloat32:
2010       GenerateFPCompare(lhs, rhs, cond, false);
2011       GenerateFPJumps(cond, &true_label, &false_label);
2012       break;
2013     case DataType::Type::kFloat64:
2014       GenerateFPCompare(lhs, rhs, cond, true);
2015       GenerateFPJumps(cond, &true_label, &false_label);
2016       break;
2017   }
2018 
2019   // Convert the jumps into the result.
2020   NearLabel done_label;
2021 
2022   // False case: result = 0.
2023   __ Bind(&false_label);
2024   __ xorl(reg, reg);
2025   __ jmp(&done_label);
2026 
2027   // True case: result = 1.
2028   __ Bind(&true_label);
2029   __ movl(reg, Immediate(1));
2030   __ Bind(&done_label);
2031 }
2032 
VisitEqual(HEqual * comp)2033 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
2034   HandleCondition(comp);
2035 }
2036 
VisitEqual(HEqual * comp)2037 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
2038   HandleCondition(comp);
2039 }
2040 
VisitNotEqual(HNotEqual * comp)2041 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
2042   HandleCondition(comp);
2043 }
2044 
VisitNotEqual(HNotEqual * comp)2045 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
2046   HandleCondition(comp);
2047 }
2048 
VisitLessThan(HLessThan * comp)2049 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
2050   HandleCondition(comp);
2051 }
2052 
VisitLessThan(HLessThan * comp)2053 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
2054   HandleCondition(comp);
2055 }
2056 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2057 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2058   HandleCondition(comp);
2059 }
2060 
VisitLessThanOrEqual(HLessThanOrEqual * comp)2061 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2062   HandleCondition(comp);
2063 }
2064 
VisitGreaterThan(HGreaterThan * comp)2065 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
2066   HandleCondition(comp);
2067 }
2068 
VisitGreaterThan(HGreaterThan * comp)2069 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
2070   HandleCondition(comp);
2071 }
2072 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2073 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2074   HandleCondition(comp);
2075 }
2076 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2077 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2078   HandleCondition(comp);
2079 }
2080 
VisitBelow(HBelow * comp)2081 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
2082   HandleCondition(comp);
2083 }
2084 
VisitBelow(HBelow * comp)2085 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
2086   HandleCondition(comp);
2087 }
2088 
VisitBelowOrEqual(HBelowOrEqual * comp)2089 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2090   HandleCondition(comp);
2091 }
2092 
VisitBelowOrEqual(HBelowOrEqual * comp)2093 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
2094   HandleCondition(comp);
2095 }
2096 
VisitAbove(HAbove * comp)2097 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
2098   HandleCondition(comp);
2099 }
2100 
VisitAbove(HAbove * comp)2101 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
2102   HandleCondition(comp);
2103 }
2104 
VisitAboveOrEqual(HAboveOrEqual * comp)2105 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2106   HandleCondition(comp);
2107 }
2108 
VisitAboveOrEqual(HAboveOrEqual * comp)2109 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
2110   HandleCondition(comp);
2111 }
2112 
VisitIntConstant(HIntConstant * constant)2113 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
2114   LocationSummary* locations =
2115       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2116   locations->SetOut(Location::ConstantLocation(constant));
2117 }
2118 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2119 void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2120   // Will be generated at use site.
2121 }
2122 
VisitNullConstant(HNullConstant * constant)2123 void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
2124   LocationSummary* locations =
2125       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2126   locations->SetOut(Location::ConstantLocation(constant));
2127 }
2128 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2129 void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2130   // Will be generated at use site.
2131 }
2132 
VisitLongConstant(HLongConstant * constant)2133 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
2134   LocationSummary* locations =
2135       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2136   locations->SetOut(Location::ConstantLocation(constant));
2137 }
2138 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2139 void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2140   // Will be generated at use site.
2141 }
2142 
VisitFloatConstant(HFloatConstant * constant)2143 void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
2144   LocationSummary* locations =
2145       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2146   locations->SetOut(Location::ConstantLocation(constant));
2147 }
2148 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2149 void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2150   // Will be generated at use site.
2151 }
2152 
VisitDoubleConstant(HDoubleConstant * constant)2153 void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
2154   LocationSummary* locations =
2155       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2156   locations->SetOut(Location::ConstantLocation(constant));
2157 }
2158 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2159 void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2160   // Will be generated at use site.
2161 }
2162 
VisitConstructorFence(HConstructorFence * constructor_fence)2163 void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
2164   constructor_fence->SetLocations(nullptr);
2165 }
2166 
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2167 void InstructionCodeGeneratorX86::VisitConstructorFence(
2168     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2169   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2170 }
2171 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2172 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2173   memory_barrier->SetLocations(nullptr);
2174 }
2175 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2176 void InstructionCodeGeneratorX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2177   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2178 }
2179 
VisitReturnVoid(HReturnVoid * ret)2180 void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
2181   ret->SetLocations(nullptr);
2182 }
2183 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2184 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2185   codegen_->GenerateFrameExit();
2186 }
2187 
VisitReturn(HReturn * ret)2188 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
2189   LocationSummary* locations =
2190       new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2191   switch (ret->InputAt(0)->GetType()) {
2192     case DataType::Type::kReference:
2193     case DataType::Type::kBool:
2194     case DataType::Type::kUint8:
2195     case DataType::Type::kInt8:
2196     case DataType::Type::kUint16:
2197     case DataType::Type::kInt16:
2198     case DataType::Type::kInt32:
2199       locations->SetInAt(0, Location::RegisterLocation(EAX));
2200       break;
2201 
2202     case DataType::Type::kInt64:
2203       locations->SetInAt(
2204           0, Location::RegisterPairLocation(EAX, EDX));
2205       break;
2206 
2207     case DataType::Type::kFloat32:
2208     case DataType::Type::kFloat64:
2209       locations->SetInAt(
2210           0, Location::FpuRegisterLocation(XMM0));
2211       break;
2212 
2213     default:
2214       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2215   }
2216 }
2217 
VisitReturn(HReturn * ret)2218 void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
2219   switch (ret->InputAt(0)->GetType()) {
2220     case DataType::Type::kReference:
2221     case DataType::Type::kBool:
2222     case DataType::Type::kUint8:
2223     case DataType::Type::kInt8:
2224     case DataType::Type::kUint16:
2225     case DataType::Type::kInt16:
2226     case DataType::Type::kInt32:
2227       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
2228       break;
2229 
2230     case DataType::Type::kInt64:
2231       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX);
2232       DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX);
2233       break;
2234 
2235     case DataType::Type::kFloat32:
2236       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2237       if (GetGraph()->IsCompilingOsr()) {
2238         // To simplify callers of an OSR method, we put the return value in both
2239         // floating point and core registers.
2240         __ movd(EAX, XMM0);
2241       }
2242       break;
2243 
2244     case DataType::Type::kFloat64:
2245       DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
2246       if (GetGraph()->IsCompilingOsr()) {
2247         // To simplify callers of an OSR method, we put the return value in both
2248         // floating point and core registers.
2249         __ movd(EAX, XMM0);
2250         // Use XMM1 as temporary register to not clobber XMM0.
2251         __ movaps(XMM1, XMM0);
2252         __ psrlq(XMM1, Immediate(32));
2253         __ movd(EDX, XMM1);
2254       }
2255       break;
2256 
2257     default:
2258       LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
2259   }
2260   codegen_->GenerateFrameExit();
2261 }
2262 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2263 void LocationsBuilderX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2264   // The trampoline uses the same calling convention as dex calling conventions,
2265   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2266   // the method_idx.
2267   HandleInvoke(invoke);
2268 }
2269 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2270 void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2271   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2272 }
2273 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2274 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2275   // Explicit clinit checks triggered by static invokes must have been pruned by
2276   // art::PrepareForRegisterAllocation.
2277   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2278 
2279   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2280   if (intrinsic.TryDispatch(invoke)) {
2281     if (invoke->GetLocations()->CanCall() &&
2282         invoke->HasPcRelativeMethodLoadKind() &&
2283         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
2284       invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
2285     }
2286     return;
2287   }
2288 
2289   HandleInvoke(invoke);
2290 
2291   // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
2292   if (invoke->HasPcRelativeMethodLoadKind()) {
2293     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
2294   }
2295 }
2296 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86 * codegen)2297 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
2298   if (invoke->GetLocations()->Intrinsified()) {
2299     IntrinsicCodeGeneratorX86 intrinsic(codegen);
2300     intrinsic.Dispatch(invoke);
2301     return true;
2302   }
2303   return false;
2304 }
2305 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2306 void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2307   // Explicit clinit checks triggered by static invokes must have been pruned by
2308   // art::PrepareForRegisterAllocation.
2309   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2310 
2311   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2312     return;
2313   }
2314 
2315   LocationSummary* locations = invoke->GetLocations();
2316   codegen_->GenerateStaticOrDirectCall(
2317       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2318 }
2319 
VisitInvokeVirtual(HInvokeVirtual * invoke)2320 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2321   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
2322   if (intrinsic.TryDispatch(invoke)) {
2323     return;
2324   }
2325 
2326   HandleInvoke(invoke);
2327 
2328   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2329     // Add one temporary for inline cache update.
2330     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2331   }
2332 }
2333 
HandleInvoke(HInvoke * invoke)2334 void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
2335   InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
2336   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2337 }
2338 
VisitInvokeVirtual(HInvokeVirtual * invoke)2339 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2340   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2341     return;
2342   }
2343 
2344   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2345   DCHECK(!codegen_->IsLeafMethod());
2346 }
2347 
VisitInvokeInterface(HInvokeInterface * invoke)2348 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2349   // This call to HandleInvoke allocates a temporary (core) register
2350   // which is also used to transfer the hidden argument from FP to
2351   // core register.
2352   HandleInvoke(invoke);
2353   // Add the hidden argument.
2354   invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
2355 
2356   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
2357     // Add one temporary for inline cache update.
2358     invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
2359   }
2360 }
2361 
MaybeGenerateInlineCacheCheck(HInstruction * instruction,Register klass)2362 void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
2363   DCHECK_EQ(EAX, klass);
2364   // We know the destination of an intrinsic, so no need to record inline
2365   // caches (also the intrinsic location builder doesn't request an additional
2366   // temporary).
2367   if (!instruction->GetLocations()->Intrinsified() &&
2368       GetGraph()->IsCompilingBaseline() &&
2369       !Runtime::Current()->IsAotCompiler()) {
2370     DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
2371     ScopedObjectAccess soa(Thread::Current());
2372     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
2373     if (info != nullptr) {
2374       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2375       uint32_t address = reinterpret_cast32<uint32_t>(cache);
2376       if (kIsDebugBuild) {
2377         uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
2378         CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
2379       }
2380       Register temp = EBP;
2381       NearLabel done;
2382       __ movl(temp, Immediate(address));
2383       // Fast path for a monomorphic cache.
2384       __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
2385       __ j(kEqual, &done);
2386       GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
2387       __ Bind(&done);
2388     }
2389   }
2390 }
2391 
VisitInvokeInterface(HInvokeInterface * invoke)2392 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
2393   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2394   LocationSummary* locations = invoke->GetLocations();
2395   Register temp = locations->GetTemp(0).AsRegister<Register>();
2396   XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2397   Location receiver = locations->InAt(0);
2398   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2399 
2400   // Set the hidden argument. This is safe to do this here, as XMM7
2401   // won't be modified thereafter, before the `call` instruction.
2402   DCHECK_EQ(XMM7, hidden_reg);
2403   __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
2404   __ movd(hidden_reg, temp);
2405 
2406   if (receiver.IsStackSlot()) {
2407     __ movl(temp, Address(ESP, receiver.GetStackIndex()));
2408     // /* HeapReference<Class> */ temp = temp->klass_
2409     __ movl(temp, Address(temp, class_offset));
2410   } else {
2411     // /* HeapReference<Class> */ temp = receiver->klass_
2412     __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
2413   }
2414   codegen_->MaybeRecordImplicitNullCheck(invoke);
2415   // Instead of simply (possibly) unpoisoning `temp` here, we should
2416   // emit a read barrier for the previous class reference load.
2417   // However this is not required in practice, as this is an
2418   // intermediate/temporary reference and because the current
2419   // concurrent copying collector keeps the from-space memory
2420   // intact/accessible until the end of the marking phase (the
2421   // concurrent copying collector may not in the future).
2422   __ MaybeUnpoisonHeapReference(temp);
2423 
2424   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2425 
2426   // temp = temp->GetAddressOfIMT()
2427   __ movl(temp,
2428       Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
2429   // temp = temp->GetImtEntryAt(method_offset);
2430   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2431       invoke->GetImtIndex(), kX86PointerSize));
2432   __ movl(temp, Address(temp, method_offset));
2433   // call temp->GetEntryPoint();
2434   __ call(Address(temp,
2435                   ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
2436 
2437   DCHECK(!codegen_->IsLeafMethod());
2438   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2439 }
2440 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2441 void LocationsBuilderX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2442   HandleInvoke(invoke);
2443 }
2444 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2445 void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2446   codegen_->GenerateInvokePolymorphicCall(invoke);
2447 }
2448 
VisitInvokeCustom(HInvokeCustom * invoke)2449 void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2450   HandleInvoke(invoke);
2451 }
2452 
VisitInvokeCustom(HInvokeCustom * invoke)2453 void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
2454   codegen_->GenerateInvokeCustomCall(invoke);
2455 }
2456 
VisitNeg(HNeg * neg)2457 void LocationsBuilderX86::VisitNeg(HNeg* neg) {
2458   LocationSummary* locations =
2459       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2460   switch (neg->GetResultType()) {
2461     case DataType::Type::kInt32:
2462     case DataType::Type::kInt64:
2463       locations->SetInAt(0, Location::RequiresRegister());
2464       locations->SetOut(Location::SameAsFirstInput());
2465       break;
2466 
2467     case DataType::Type::kFloat32:
2468       locations->SetInAt(0, Location::RequiresFpuRegister());
2469       locations->SetOut(Location::SameAsFirstInput());
2470       locations->AddTemp(Location::RequiresRegister());
2471       locations->AddTemp(Location::RequiresFpuRegister());
2472       break;
2473 
2474     case DataType::Type::kFloat64:
2475       locations->SetInAt(0, Location::RequiresFpuRegister());
2476       locations->SetOut(Location::SameAsFirstInput());
2477       locations->AddTemp(Location::RequiresFpuRegister());
2478       break;
2479 
2480     default:
2481       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2482   }
2483 }
2484 
VisitNeg(HNeg * neg)2485 void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
2486   LocationSummary* locations = neg->GetLocations();
2487   Location out = locations->Out();
2488   Location in = locations->InAt(0);
2489   switch (neg->GetResultType()) {
2490     case DataType::Type::kInt32:
2491       DCHECK(in.IsRegister());
2492       DCHECK(in.Equals(out));
2493       __ negl(out.AsRegister<Register>());
2494       break;
2495 
2496     case DataType::Type::kInt64:
2497       DCHECK(in.IsRegisterPair());
2498       DCHECK(in.Equals(out));
2499       __ negl(out.AsRegisterPairLow<Register>());
2500       // Negation is similar to subtraction from zero.  The least
2501       // significant byte triggers a borrow when it is different from
2502       // zero; to take it into account, add 1 to the most significant
2503       // byte if the carry flag (CF) is set to 1 after the first NEGL
2504       // operation.
2505       __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
2506       __ negl(out.AsRegisterPairHigh<Register>());
2507       break;
2508 
2509     case DataType::Type::kFloat32: {
2510       DCHECK(in.Equals(out));
2511       Register constant = locations->GetTemp(0).AsRegister<Register>();
2512       XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2513       // Implement float negation with an exclusive or with value
2514       // 0x80000000 (mask for bit 31, representing the sign of a
2515       // single-precision floating-point number).
2516       __ movl(constant, Immediate(INT32_C(0x80000000)));
2517       __ movd(mask, constant);
2518       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2519       break;
2520     }
2521 
2522     case DataType::Type::kFloat64: {
2523       DCHECK(in.Equals(out));
2524       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2525       // Implement double negation with an exclusive or with value
2526       // 0x8000000000000000 (mask for bit 63, representing the sign of
2527       // a double-precision floating-point number).
2528       __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
2529       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2530       break;
2531     }
2532 
2533     default:
2534       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2535   }
2536 }
2537 
VisitX86FPNeg(HX86FPNeg * neg)2538 void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) {
2539   LocationSummary* locations =
2540       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2541   DCHECK(DataType::IsFloatingPointType(neg->GetType()));
2542   locations->SetInAt(0, Location::RequiresFpuRegister());
2543   locations->SetInAt(1, Location::RequiresRegister());
2544   locations->SetOut(Location::SameAsFirstInput());
2545   locations->AddTemp(Location::RequiresFpuRegister());
2546 }
2547 
VisitX86FPNeg(HX86FPNeg * neg)2548 void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
2549   LocationSummary* locations = neg->GetLocations();
2550   Location out = locations->Out();
2551   DCHECK(locations->InAt(0).Equals(out));
2552 
2553   Register constant_area = locations->InAt(1).AsRegister<Register>();
2554   XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2555   if (neg->GetType() == DataType::Type::kFloat32) {
2556     __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000),
2557                                                  neg->GetBaseMethodAddress(),
2558                                                  constant_area));
2559     __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2560   } else {
2561      __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
2562                                                   neg->GetBaseMethodAddress(),
2563                                                   constant_area));
2564      __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2565   }
2566 }
2567 
VisitTypeConversion(HTypeConversion * conversion)2568 void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
2569   DataType::Type result_type = conversion->GetResultType();
2570   DataType::Type input_type = conversion->GetInputType();
2571   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2572       << input_type << " -> " << result_type;
2573 
2574   // The float-to-long and double-to-long type conversions rely on a
2575   // call to the runtime.
2576   LocationSummary::CallKind call_kind =
2577       ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64)
2578        && result_type == DataType::Type::kInt64)
2579       ? LocationSummary::kCallOnMainOnly
2580       : LocationSummary::kNoCall;
2581   LocationSummary* locations =
2582       new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind);
2583 
2584   switch (result_type) {
2585     case DataType::Type::kUint8:
2586     case DataType::Type::kInt8:
2587       switch (input_type) {
2588         case DataType::Type::kUint8:
2589         case DataType::Type::kInt8:
2590         case DataType::Type::kUint16:
2591         case DataType::Type::kInt16:
2592         case DataType::Type::kInt32:
2593           locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
2594           // Make the output overlap to please the register allocator. This greatly simplifies
2595           // the validation of the linear scan implementation
2596           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2597           break;
2598         case DataType::Type::kInt64: {
2599           HInstruction* input = conversion->InputAt(0);
2600           Location input_location = input->IsConstant()
2601               ? Location::ConstantLocation(input->AsConstant())
2602               : Location::RegisterPairLocation(EAX, EDX);
2603           locations->SetInAt(0, input_location);
2604           // Make the output overlap to please the register allocator. This greatly simplifies
2605           // the validation of the linear scan implementation
2606           locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2607           break;
2608         }
2609 
2610         default:
2611           LOG(FATAL) << "Unexpected type conversion from " << input_type
2612                      << " to " << result_type;
2613       }
2614       break;
2615 
2616     case DataType::Type::kUint16:
2617     case DataType::Type::kInt16:
2618       DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2619       locations->SetInAt(0, Location::Any());
2620       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2621       break;
2622 
2623     case DataType::Type::kInt32:
2624       switch (input_type) {
2625         case DataType::Type::kInt64:
2626           locations->SetInAt(0, Location::Any());
2627           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2628           break;
2629 
2630         case DataType::Type::kFloat32:
2631           locations->SetInAt(0, Location::RequiresFpuRegister());
2632           locations->SetOut(Location::RequiresRegister());
2633           locations->AddTemp(Location::RequiresFpuRegister());
2634           break;
2635 
2636         case DataType::Type::kFloat64:
2637           locations->SetInAt(0, Location::RequiresFpuRegister());
2638           locations->SetOut(Location::RequiresRegister());
2639           locations->AddTemp(Location::RequiresFpuRegister());
2640           break;
2641 
2642         default:
2643           LOG(FATAL) << "Unexpected type conversion from " << input_type
2644                      << " to " << result_type;
2645       }
2646       break;
2647 
2648     case DataType::Type::kInt64:
2649       switch (input_type) {
2650         case DataType::Type::kBool:
2651         case DataType::Type::kUint8:
2652         case DataType::Type::kInt8:
2653         case DataType::Type::kUint16:
2654         case DataType::Type::kInt16:
2655         case DataType::Type::kInt32:
2656           locations->SetInAt(0, Location::RegisterLocation(EAX));
2657           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2658           break;
2659 
2660         case DataType::Type::kFloat32:
2661         case DataType::Type::kFloat64: {
2662           InvokeRuntimeCallingConvention calling_convention;
2663           XmmRegister parameter = calling_convention.GetFpuRegisterAt(0);
2664           locations->SetInAt(0, Location::FpuRegisterLocation(parameter));
2665 
2666           // The runtime helper puts the result in EAX, EDX.
2667           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
2668         }
2669         break;
2670 
2671         default:
2672           LOG(FATAL) << "Unexpected type conversion from " << input_type
2673                      << " to " << result_type;
2674       }
2675       break;
2676 
2677     case DataType::Type::kFloat32:
2678       switch (input_type) {
2679         case DataType::Type::kBool:
2680         case DataType::Type::kUint8:
2681         case DataType::Type::kInt8:
2682         case DataType::Type::kUint16:
2683         case DataType::Type::kInt16:
2684         case DataType::Type::kInt32:
2685           locations->SetInAt(0, Location::RequiresRegister());
2686           locations->SetOut(Location::RequiresFpuRegister());
2687           break;
2688 
2689         case DataType::Type::kInt64:
2690           locations->SetInAt(0, Location::Any());
2691           locations->SetOut(Location::Any());
2692           break;
2693 
2694         case DataType::Type::kFloat64:
2695           locations->SetInAt(0, Location::RequiresFpuRegister());
2696           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2697           break;
2698 
2699         default:
2700           LOG(FATAL) << "Unexpected type conversion from " << input_type
2701                      << " to " << result_type;
2702       }
2703       break;
2704 
2705     case DataType::Type::kFloat64:
2706       switch (input_type) {
2707         case DataType::Type::kBool:
2708         case DataType::Type::kUint8:
2709         case DataType::Type::kInt8:
2710         case DataType::Type::kUint16:
2711         case DataType::Type::kInt16:
2712         case DataType::Type::kInt32:
2713           locations->SetInAt(0, Location::RequiresRegister());
2714           locations->SetOut(Location::RequiresFpuRegister());
2715           break;
2716 
2717         case DataType::Type::kInt64:
2718           locations->SetInAt(0, Location::Any());
2719           locations->SetOut(Location::Any());
2720           break;
2721 
2722         case DataType::Type::kFloat32:
2723           locations->SetInAt(0, Location::RequiresFpuRegister());
2724           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2725           break;
2726 
2727         default:
2728           LOG(FATAL) << "Unexpected type conversion from " << input_type
2729                      << " to " << result_type;
2730       }
2731       break;
2732 
2733     default:
2734       LOG(FATAL) << "Unexpected type conversion from " << input_type
2735                  << " to " << result_type;
2736   }
2737 }
2738 
VisitTypeConversion(HTypeConversion * conversion)2739 void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversion) {
2740   LocationSummary* locations = conversion->GetLocations();
2741   Location out = locations->Out();
2742   Location in = locations->InAt(0);
2743   DataType::Type result_type = conversion->GetResultType();
2744   DataType::Type input_type = conversion->GetInputType();
2745   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2746       << input_type << " -> " << result_type;
2747   switch (result_type) {
2748     case DataType::Type::kUint8:
2749       switch (input_type) {
2750         case DataType::Type::kInt8:
2751         case DataType::Type::kUint16:
2752         case DataType::Type::kInt16:
2753         case DataType::Type::kInt32:
2754           if (in.IsRegister()) {
2755             __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2756           } else {
2757             DCHECK(in.GetConstant()->IsIntConstant());
2758             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2759             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2760           }
2761           break;
2762         case DataType::Type::kInt64:
2763           if (in.IsRegisterPair()) {
2764             __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2765           } else {
2766             DCHECK(in.GetConstant()->IsLongConstant());
2767             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2768             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value)));
2769           }
2770           break;
2771 
2772         default:
2773           LOG(FATAL) << "Unexpected type conversion from " << input_type
2774                      << " to " << result_type;
2775       }
2776       break;
2777 
2778     case DataType::Type::kInt8:
2779       switch (input_type) {
2780         case DataType::Type::kUint8:
2781         case DataType::Type::kUint16:
2782         case DataType::Type::kInt16:
2783         case DataType::Type::kInt32:
2784           if (in.IsRegister()) {
2785             __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
2786           } else {
2787             DCHECK(in.GetConstant()->IsIntConstant());
2788             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2789             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2790           }
2791           break;
2792         case DataType::Type::kInt64:
2793           if (in.IsRegisterPair()) {
2794             __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>());
2795           } else {
2796             DCHECK(in.GetConstant()->IsLongConstant());
2797             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2798             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
2799           }
2800           break;
2801 
2802         default:
2803           LOG(FATAL) << "Unexpected type conversion from " << input_type
2804                      << " to " << result_type;
2805       }
2806       break;
2807 
2808     case DataType::Type::kUint16:
2809       switch (input_type) {
2810         case DataType::Type::kInt8:
2811         case DataType::Type::kInt16:
2812         case DataType::Type::kInt32:
2813           if (in.IsRegister()) {
2814             __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2815           } else if (in.IsStackSlot()) {
2816             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2817           } else {
2818             DCHECK(in.GetConstant()->IsIntConstant());
2819             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2820             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2821           }
2822           break;
2823         case DataType::Type::kInt64:
2824           if (in.IsRegisterPair()) {
2825             __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2826           } else if (in.IsDoubleStackSlot()) {
2827             __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2828           } else {
2829             DCHECK(in.GetConstant()->IsLongConstant());
2830             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2831             __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
2832           }
2833           break;
2834 
2835         default:
2836           LOG(FATAL) << "Unexpected type conversion from " << input_type
2837                      << " to " << result_type;
2838       }
2839       break;
2840 
2841     case DataType::Type::kInt16:
2842       switch (input_type) {
2843         case DataType::Type::kUint16:
2844         case DataType::Type::kInt32:
2845           if (in.IsRegister()) {
2846             __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
2847           } else if (in.IsStackSlot()) {
2848             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2849           } else {
2850             DCHECK(in.GetConstant()->IsIntConstant());
2851             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
2852             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2853           }
2854           break;
2855         case DataType::Type::kInt64:
2856           if (in.IsRegisterPair()) {
2857             __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2858           } else if (in.IsDoubleStackSlot()) {
2859             __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2860           } else {
2861             DCHECK(in.GetConstant()->IsLongConstant());
2862             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2863             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
2864           }
2865           break;
2866 
2867         default:
2868           LOG(FATAL) << "Unexpected type conversion from " << input_type
2869                      << " to " << result_type;
2870       }
2871       break;
2872 
2873     case DataType::Type::kInt32:
2874       switch (input_type) {
2875         case DataType::Type::kInt64:
2876           if (in.IsRegisterPair()) {
2877             __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
2878           } else if (in.IsDoubleStackSlot()) {
2879             __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
2880           } else {
2881             DCHECK(in.IsConstant());
2882             DCHECK(in.GetConstant()->IsLongConstant());
2883             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2884             __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
2885           }
2886           break;
2887 
2888         case DataType::Type::kFloat32: {
2889           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2890           Register output = out.AsRegister<Register>();
2891           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2892           NearLabel done, nan;
2893 
2894           __ movl(output, Immediate(kPrimIntMax));
2895           // temp = int-to-float(output)
2896           __ cvtsi2ss(temp, output);
2897           // if input >= temp goto done
2898           __ comiss(input, temp);
2899           __ j(kAboveEqual, &done);
2900           // if input == NaN goto nan
2901           __ j(kUnordered, &nan);
2902           // output = float-to-int-truncate(input)
2903           __ cvttss2si(output, input);
2904           __ jmp(&done);
2905           __ Bind(&nan);
2906           //  output = 0
2907           __ xorl(output, output);
2908           __ Bind(&done);
2909           break;
2910         }
2911 
2912         case DataType::Type::kFloat64: {
2913           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2914           Register output = out.AsRegister<Register>();
2915           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2916           NearLabel done, nan;
2917 
2918           __ movl(output, Immediate(kPrimIntMax));
2919           // temp = int-to-double(output)
2920           __ cvtsi2sd(temp, output);
2921           // if input >= temp goto done
2922           __ comisd(input, temp);
2923           __ j(kAboveEqual, &done);
2924           // if input == NaN goto nan
2925           __ j(kUnordered, &nan);
2926           // output = double-to-int-truncate(input)
2927           __ cvttsd2si(output, input);
2928           __ jmp(&done);
2929           __ Bind(&nan);
2930           //  output = 0
2931           __ xorl(output, output);
2932           __ Bind(&done);
2933           break;
2934         }
2935 
2936         default:
2937           LOG(FATAL) << "Unexpected type conversion from " << input_type
2938                      << " to " << result_type;
2939       }
2940       break;
2941 
2942     case DataType::Type::kInt64:
2943       switch (input_type) {
2944         case DataType::Type::kBool:
2945         case DataType::Type::kUint8:
2946         case DataType::Type::kInt8:
2947         case DataType::Type::kUint16:
2948         case DataType::Type::kInt16:
2949         case DataType::Type::kInt32:
2950           DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
2951           DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
2952           DCHECK_EQ(in.AsRegister<Register>(), EAX);
2953           __ cdq();
2954           break;
2955 
2956         case DataType::Type::kFloat32:
2957           codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc());
2958           CheckEntrypointTypes<kQuickF2l, int64_t, float>();
2959           break;
2960 
2961         case DataType::Type::kFloat64:
2962           codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc());
2963           CheckEntrypointTypes<kQuickD2l, int64_t, double>();
2964           break;
2965 
2966         default:
2967           LOG(FATAL) << "Unexpected type conversion from " << input_type
2968                      << " to " << result_type;
2969       }
2970       break;
2971 
2972     case DataType::Type::kFloat32:
2973       switch (input_type) {
2974         case DataType::Type::kBool:
2975         case DataType::Type::kUint8:
2976         case DataType::Type::kInt8:
2977         case DataType::Type::kUint16:
2978         case DataType::Type::kInt16:
2979         case DataType::Type::kInt32:
2980           __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
2981           break;
2982 
2983         case DataType::Type::kInt64: {
2984           size_t adjustment = 0;
2985 
2986           // Create stack space for the call to
2987           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
2988           // TODO: enhance register allocator to ask for stack temporaries.
2989           if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
2990             adjustment = DataType::Size(DataType::Type::kInt64);
2991             __ subl(ESP, Immediate(adjustment));
2992           }
2993 
2994           // Load the value to the FP stack, using temporaries if needed.
2995           PushOntoFPStack(in, 0, adjustment, false, true);
2996 
2997           if (out.IsStackSlot()) {
2998             __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
2999           } else {
3000             __ fstps(Address(ESP, 0));
3001             Location stack_temp = Location::StackSlot(0);
3002             codegen_->Move32(out, stack_temp);
3003           }
3004 
3005           // Remove the temporary stack space we allocated.
3006           if (adjustment != 0) {
3007             __ addl(ESP, Immediate(adjustment));
3008           }
3009           break;
3010         }
3011 
3012         case DataType::Type::kFloat64:
3013           __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3014           break;
3015 
3016         default:
3017           LOG(FATAL) << "Unexpected type conversion from " << input_type
3018                      << " to " << result_type;
3019       }
3020       break;
3021 
3022     case DataType::Type::kFloat64:
3023       switch (input_type) {
3024         case DataType::Type::kBool:
3025         case DataType::Type::kUint8:
3026         case DataType::Type::kInt8:
3027         case DataType::Type::kUint16:
3028         case DataType::Type::kInt16:
3029         case DataType::Type::kInt32:
3030           __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
3031           break;
3032 
3033         case DataType::Type::kInt64: {
3034           size_t adjustment = 0;
3035 
3036           // Create stack space for the call to
3037           // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
3038           // TODO: enhance register allocator to ask for stack temporaries.
3039           if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
3040             adjustment = DataType::Size(DataType::Type::kInt64);
3041             __ subl(ESP, Immediate(adjustment));
3042           }
3043 
3044           // Load the value to the FP stack, using temporaries if needed.
3045           PushOntoFPStack(in, 0, adjustment, false, true);
3046 
3047           if (out.IsDoubleStackSlot()) {
3048             __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
3049           } else {
3050             __ fstpl(Address(ESP, 0));
3051             Location stack_temp = Location::DoubleStackSlot(0);
3052             codegen_->Move64(out, stack_temp);
3053           }
3054 
3055           // Remove the temporary stack space we allocated.
3056           if (adjustment != 0) {
3057             __ addl(ESP, Immediate(adjustment));
3058           }
3059           break;
3060         }
3061 
3062         case DataType::Type::kFloat32:
3063           __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3064           break;
3065 
3066         default:
3067           LOG(FATAL) << "Unexpected type conversion from " << input_type
3068                      << " to " << result_type;
3069       }
3070       break;
3071 
3072     default:
3073       LOG(FATAL) << "Unexpected type conversion from " << input_type
3074                  << " to " << result_type;
3075   }
3076 }
3077 
VisitAdd(HAdd * add)3078 void LocationsBuilderX86::VisitAdd(HAdd* add) {
3079   LocationSummary* locations =
3080       new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3081   switch (add->GetResultType()) {
3082     case DataType::Type::kInt32: {
3083       locations->SetInAt(0, Location::RequiresRegister());
3084       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3085       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3086       break;
3087     }
3088 
3089     case DataType::Type::kInt64: {
3090       locations->SetInAt(0, Location::RequiresRegister());
3091       locations->SetInAt(1, Location::Any());
3092       locations->SetOut(Location::SameAsFirstInput());
3093       break;
3094     }
3095 
3096     case DataType::Type::kFloat32:
3097     case DataType::Type::kFloat64: {
3098       locations->SetInAt(0, Location::RequiresFpuRegister());
3099       if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3100         DCHECK(add->InputAt(1)->IsEmittedAtUseSite());
3101       } else if (add->InputAt(1)->IsConstant()) {
3102         locations->SetInAt(1, Location::RequiresFpuRegister());
3103       } else {
3104         locations->SetInAt(1, Location::Any());
3105       }
3106       locations->SetOut(Location::SameAsFirstInput());
3107       break;
3108     }
3109 
3110     default:
3111       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3112       UNREACHABLE();
3113   }
3114 }
3115 
VisitAdd(HAdd * add)3116 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
3117   LocationSummary* locations = add->GetLocations();
3118   Location first = locations->InAt(0);
3119   Location second = locations->InAt(1);
3120   Location out = locations->Out();
3121 
3122   switch (add->GetResultType()) {
3123     case DataType::Type::kInt32: {
3124       if (second.IsRegister()) {
3125         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3126           __ addl(out.AsRegister<Register>(), second.AsRegister<Register>());
3127         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3128           __ addl(out.AsRegister<Register>(), first.AsRegister<Register>());
3129         } else {
3130           __ leal(out.AsRegister<Register>(), Address(
3131               first.AsRegister<Register>(), second.AsRegister<Register>(), TIMES_1, 0));
3132           }
3133       } else if (second.IsConstant()) {
3134         int32_t value = second.GetConstant()->AsIntConstant()->GetValue();
3135         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3136           __ addl(out.AsRegister<Register>(), Immediate(value));
3137         } else {
3138           __ leal(out.AsRegister<Register>(), Address(first.AsRegister<Register>(), value));
3139         }
3140       } else {
3141         DCHECK(first.Equals(locations->Out()));
3142         __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3143       }
3144       break;
3145     }
3146 
3147     case DataType::Type::kInt64: {
3148       if (second.IsRegisterPair()) {
3149         __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3150         __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3151       } else if (second.IsDoubleStackSlot()) {
3152         __ addl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3153         __ adcl(first.AsRegisterPairHigh<Register>(),
3154                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3155       } else {
3156         DCHECK(second.IsConstant()) << second;
3157         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3158         __ addl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3159         __ adcl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3160       }
3161       break;
3162     }
3163 
3164     case DataType::Type::kFloat32: {
3165       if (second.IsFpuRegister()) {
3166         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3167       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3168         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3169         DCHECK(const_area->IsEmittedAtUseSite());
3170         __ addss(first.AsFpuRegister<XmmRegister>(),
3171                  codegen_->LiteralFloatAddress(
3172                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3173                      const_area->GetBaseMethodAddress(),
3174                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3175       } else {
3176         DCHECK(second.IsStackSlot());
3177         __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3178       }
3179       break;
3180     }
3181 
3182     case DataType::Type::kFloat64: {
3183       if (second.IsFpuRegister()) {
3184         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3185       } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) {
3186         HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable();
3187         DCHECK(const_area->IsEmittedAtUseSite());
3188         __ addsd(first.AsFpuRegister<XmmRegister>(),
3189                  codegen_->LiteralDoubleAddress(
3190                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3191                      const_area->GetBaseMethodAddress(),
3192                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3193       } else {
3194         DCHECK(second.IsDoubleStackSlot());
3195         __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3196       }
3197       break;
3198     }
3199 
3200     default:
3201       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3202   }
3203 }
3204 
VisitSub(HSub * sub)3205 void LocationsBuilderX86::VisitSub(HSub* sub) {
3206   LocationSummary* locations =
3207       new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3208   switch (sub->GetResultType()) {
3209     case DataType::Type::kInt32:
3210     case DataType::Type::kInt64: {
3211       locations->SetInAt(0, Location::RequiresRegister());
3212       locations->SetInAt(1, Location::Any());
3213       locations->SetOut(Location::SameAsFirstInput());
3214       break;
3215     }
3216     case DataType::Type::kFloat32:
3217     case DataType::Type::kFloat64: {
3218       locations->SetInAt(0, Location::RequiresFpuRegister());
3219       if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3220         DCHECK(sub->InputAt(1)->IsEmittedAtUseSite());
3221       } else if (sub->InputAt(1)->IsConstant()) {
3222         locations->SetInAt(1, Location::RequiresFpuRegister());
3223       } else {
3224         locations->SetInAt(1, Location::Any());
3225       }
3226       locations->SetOut(Location::SameAsFirstInput());
3227       break;
3228     }
3229 
3230     default:
3231       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3232   }
3233 }
3234 
VisitSub(HSub * sub)3235 void InstructionCodeGeneratorX86::VisitSub(HSub* sub) {
3236   LocationSummary* locations = sub->GetLocations();
3237   Location first = locations->InAt(0);
3238   Location second = locations->InAt(1);
3239   DCHECK(first.Equals(locations->Out()));
3240   switch (sub->GetResultType()) {
3241     case DataType::Type::kInt32: {
3242       if (second.IsRegister()) {
3243         __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
3244       } else if (second.IsConstant()) {
3245         __ subl(first.AsRegister<Register>(),
3246                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3247       } else {
3248         __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3249       }
3250       break;
3251     }
3252 
3253     case DataType::Type::kInt64: {
3254       if (second.IsRegisterPair()) {
3255         __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
3256         __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
3257       } else if (second.IsDoubleStackSlot()) {
3258         __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
3259         __ sbbl(first.AsRegisterPairHigh<Register>(),
3260                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
3261       } else {
3262         DCHECK(second.IsConstant()) << second;
3263         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3264         __ subl(first.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
3265         __ sbbl(first.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
3266       }
3267       break;
3268     }
3269 
3270     case DataType::Type::kFloat32: {
3271       if (second.IsFpuRegister()) {
3272         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3273       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3274         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3275         DCHECK(const_area->IsEmittedAtUseSite());
3276         __ subss(first.AsFpuRegister<XmmRegister>(),
3277                  codegen_->LiteralFloatAddress(
3278                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3279                      const_area->GetBaseMethodAddress(),
3280                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3281       } else {
3282         DCHECK(second.IsStackSlot());
3283         __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3284       }
3285       break;
3286     }
3287 
3288     case DataType::Type::kFloat64: {
3289       if (second.IsFpuRegister()) {
3290         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3291       } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) {
3292         HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable();
3293         DCHECK(const_area->IsEmittedAtUseSite());
3294         __ subsd(first.AsFpuRegister<XmmRegister>(),
3295                  codegen_->LiteralDoubleAddress(
3296                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3297                      const_area->GetBaseMethodAddress(),
3298                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3299       } else {
3300         DCHECK(second.IsDoubleStackSlot());
3301         __ subsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3302       }
3303       break;
3304     }
3305 
3306     default:
3307       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3308   }
3309 }
3310 
VisitMul(HMul * mul)3311 void LocationsBuilderX86::VisitMul(HMul* mul) {
3312   LocationSummary* locations =
3313       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3314   switch (mul->GetResultType()) {
3315     case DataType::Type::kInt32:
3316       locations->SetInAt(0, Location::RequiresRegister());
3317       locations->SetInAt(1, Location::Any());
3318       if (mul->InputAt(1)->IsIntConstant()) {
3319         // Can use 3 operand multiply.
3320         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3321       } else {
3322         locations->SetOut(Location::SameAsFirstInput());
3323       }
3324       break;
3325     case DataType::Type::kInt64: {
3326       locations->SetInAt(0, Location::RequiresRegister());
3327       locations->SetInAt(1, Location::Any());
3328       locations->SetOut(Location::SameAsFirstInput());
3329       // Needed for imul on 32bits with 64bits output.
3330       locations->AddTemp(Location::RegisterLocation(EAX));
3331       locations->AddTemp(Location::RegisterLocation(EDX));
3332       break;
3333     }
3334     case DataType::Type::kFloat32:
3335     case DataType::Type::kFloat64: {
3336       locations->SetInAt(0, Location::RequiresFpuRegister());
3337       if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3338         DCHECK(mul->InputAt(1)->IsEmittedAtUseSite());
3339       } else if (mul->InputAt(1)->IsConstant()) {
3340         locations->SetInAt(1, Location::RequiresFpuRegister());
3341       } else {
3342         locations->SetInAt(1, Location::Any());
3343       }
3344       locations->SetOut(Location::SameAsFirstInput());
3345       break;
3346     }
3347 
3348     default:
3349       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3350   }
3351 }
3352 
VisitMul(HMul * mul)3353 void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
3354   LocationSummary* locations = mul->GetLocations();
3355   Location first = locations->InAt(0);
3356   Location second = locations->InAt(1);
3357   Location out = locations->Out();
3358 
3359   switch (mul->GetResultType()) {
3360     case DataType::Type::kInt32:
3361       // The constant may have ended up in a register, so test explicitly to avoid
3362       // problems where the output may not be the same as the first operand.
3363       if (mul->InputAt(1)->IsIntConstant()) {
3364         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3365         __ imull(out.AsRegister<Register>(), first.AsRegister<Register>(), imm);
3366       } else if (second.IsRegister()) {
3367         DCHECK(first.Equals(out));
3368         __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
3369       } else {
3370         DCHECK(second.IsStackSlot());
3371         DCHECK(first.Equals(out));
3372         __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
3373       }
3374       break;
3375 
3376     case DataType::Type::kInt64: {
3377       Register in1_hi = first.AsRegisterPairHigh<Register>();
3378       Register in1_lo = first.AsRegisterPairLow<Register>();
3379       Register eax = locations->GetTemp(0).AsRegister<Register>();
3380       Register edx = locations->GetTemp(1).AsRegister<Register>();
3381 
3382       DCHECK_EQ(EAX, eax);
3383       DCHECK_EQ(EDX, edx);
3384 
3385       // input: in1 - 64 bits, in2 - 64 bits.
3386       // output: in1
3387       // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo
3388       // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32]
3389       // parts: in1.lo = (in1.lo * in2.lo)[31:0]
3390       if (second.IsConstant()) {
3391         DCHECK(second.GetConstant()->IsLongConstant());
3392 
3393         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3394         int32_t low_value = Low32Bits(value);
3395         int32_t high_value = High32Bits(value);
3396         Immediate low(low_value);
3397         Immediate high(high_value);
3398 
3399         __ movl(eax, high);
3400         // eax <- in1.lo * in2.hi
3401         __ imull(eax, in1_lo);
3402         // in1.hi <- in1.hi * in2.lo
3403         __ imull(in1_hi, low);
3404         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3405         __ addl(in1_hi, eax);
3406         // move in2_lo to eax to prepare for double precision
3407         __ movl(eax, low);
3408         // edx:eax <- in1.lo * in2.lo
3409         __ mull(in1_lo);
3410         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3411         __ addl(in1_hi, edx);
3412         // in1.lo <- (in1.lo * in2.lo)[31:0];
3413         __ movl(in1_lo, eax);
3414       } else if (second.IsRegisterPair()) {
3415         Register in2_hi = second.AsRegisterPairHigh<Register>();
3416         Register in2_lo = second.AsRegisterPairLow<Register>();
3417 
3418         __ movl(eax, in2_hi);
3419         // eax <- in1.lo * in2.hi
3420         __ imull(eax, in1_lo);
3421         // in1.hi <- in1.hi * in2.lo
3422         __ imull(in1_hi, in2_lo);
3423         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3424         __ addl(in1_hi, eax);
3425         // move in1_lo to eax to prepare for double precision
3426         __ movl(eax, in1_lo);
3427         // edx:eax <- in1.lo * in2.lo
3428         __ mull(in2_lo);
3429         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3430         __ addl(in1_hi, edx);
3431         // in1.lo <- (in1.lo * in2.lo)[31:0];
3432         __ movl(in1_lo, eax);
3433       } else {
3434         DCHECK(second.IsDoubleStackSlot()) << second;
3435         Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
3436         Address in2_lo(ESP, second.GetStackIndex());
3437 
3438         __ movl(eax, in2_hi);
3439         // eax <- in1.lo * in2.hi
3440         __ imull(eax, in1_lo);
3441         // in1.hi <- in1.hi * in2.lo
3442         __ imull(in1_hi, in2_lo);
3443         // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo
3444         __ addl(in1_hi, eax);
3445         // move in1_lo to eax to prepare for double precision
3446         __ movl(eax, in1_lo);
3447         // edx:eax <- in1.lo * in2.lo
3448         __ mull(in2_lo);
3449         // in1.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
3450         __ addl(in1_hi, edx);
3451         // in1.lo <- (in1.lo * in2.lo)[31:0];
3452         __ movl(in1_lo, eax);
3453       }
3454 
3455       break;
3456     }
3457 
3458     case DataType::Type::kFloat32: {
3459       DCHECK(first.Equals(locations->Out()));
3460       if (second.IsFpuRegister()) {
3461         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3462       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3463         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3464         DCHECK(const_area->IsEmittedAtUseSite());
3465         __ mulss(first.AsFpuRegister<XmmRegister>(),
3466                  codegen_->LiteralFloatAddress(
3467                      const_area->GetConstant()->AsFloatConstant()->GetValue(),
3468                      const_area->GetBaseMethodAddress(),
3469                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3470       } else {
3471         DCHECK(second.IsStackSlot());
3472         __ mulss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3473       }
3474       break;
3475     }
3476 
3477     case DataType::Type::kFloat64: {
3478       DCHECK(first.Equals(locations->Out()));
3479       if (second.IsFpuRegister()) {
3480         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3481       } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) {
3482         HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable();
3483         DCHECK(const_area->IsEmittedAtUseSite());
3484         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3485                  codegen_->LiteralDoubleAddress(
3486                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3487                      const_area->GetBaseMethodAddress(),
3488                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3489       } else {
3490         DCHECK(second.IsDoubleStackSlot());
3491         __ mulsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3492       }
3493       break;
3494     }
3495 
3496     default:
3497       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3498   }
3499 }
3500 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_fp,bool is_wide)3501 void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
3502                                                   uint32_t temp_offset,
3503                                                   uint32_t stack_adjustment,
3504                                                   bool is_fp,
3505                                                   bool is_wide) {
3506   if (source.IsStackSlot()) {
3507     DCHECK(!is_wide);
3508     if (is_fp) {
3509       __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3510     } else {
3511       __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
3512     }
3513   } else if (source.IsDoubleStackSlot()) {
3514     DCHECK(is_wide);
3515     if (is_fp) {
3516       __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3517     } else {
3518       __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
3519     }
3520   } else {
3521     // Write the value to the temporary location on the stack and load to FP stack.
3522     if (!is_wide) {
3523       Location stack_temp = Location::StackSlot(temp_offset);
3524       codegen_->Move32(stack_temp, source);
3525       if (is_fp) {
3526         __ flds(Address(ESP, temp_offset));
3527       } else {
3528         __ filds(Address(ESP, temp_offset));
3529       }
3530     } else {
3531       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3532       codegen_->Move64(stack_temp, source);
3533       if (is_fp) {
3534         __ fldl(Address(ESP, temp_offset));
3535       } else {
3536         __ fildl(Address(ESP, temp_offset));
3537       }
3538     }
3539   }
3540 }
3541 
GenerateRemFP(HRem * rem)3542 void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
3543   DataType::Type type = rem->GetResultType();
3544   bool is_float = type == DataType::Type::kFloat32;
3545   size_t elem_size = DataType::Size(type);
3546   LocationSummary* locations = rem->GetLocations();
3547   Location first = locations->InAt(0);
3548   Location second = locations->InAt(1);
3549   Location out = locations->Out();
3550 
3551   // Create stack space for 2 elements.
3552   // TODO: enhance register allocator to ask for stack temporaries.
3553   __ subl(ESP, Immediate(2 * elem_size));
3554 
3555   // Load the values to the FP stack in reverse order, using temporaries if needed.
3556   const bool is_wide = !is_float;
3557   PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
3558   PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
3559 
3560   // Loop doing FPREM until we stabilize.
3561   NearLabel retry;
3562   __ Bind(&retry);
3563   __ fprem();
3564 
3565   // Move FP status to AX.
3566   __ fstsw();
3567 
3568   // And see if the argument reduction is complete. This is signaled by the
3569   // C2 FPU flag bit set to 0.
3570   __ andl(EAX, Immediate(kC2ConditionMask));
3571   __ j(kNotEqual, &retry);
3572 
3573   // We have settled on the final value. Retrieve it into an XMM register.
3574   // Store FP top of stack to real stack.
3575   if (is_float) {
3576     __ fsts(Address(ESP, 0));
3577   } else {
3578     __ fstl(Address(ESP, 0));
3579   }
3580 
3581   // Pop the 2 items from the FP stack.
3582   __ fucompp();
3583 
3584   // Load the value from the stack into an XMM register.
3585   DCHECK(out.IsFpuRegister()) << out;
3586   if (is_float) {
3587     __ movss(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3588   } else {
3589     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
3590   }
3591 
3592   // And remove the temporary stack space we allocated.
3593   __ addl(ESP, Immediate(2 * elem_size));
3594 }
3595 
3596 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3597 void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3598   DCHECK(instruction->IsDiv() || instruction->IsRem());
3599 
3600   LocationSummary* locations = instruction->GetLocations();
3601   DCHECK(locations->InAt(1).IsConstant());
3602   DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
3603 
3604   Register out_register = locations->Out().AsRegister<Register>();
3605   Register input_register = locations->InAt(0).AsRegister<Register>();
3606   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3607 
3608   DCHECK(imm == 1 || imm == -1);
3609 
3610   if (instruction->IsRem()) {
3611     __ xorl(out_register, out_register);
3612   } else {
3613     __ movl(out_register, input_register);
3614     if (imm == -1) {
3615       __ negl(out_register);
3616     }
3617   }
3618 }
3619 
RemByPowerOfTwo(HRem * instruction)3620 void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
3621   LocationSummary* locations = instruction->GetLocations();
3622   Location second = locations->InAt(1);
3623 
3624   Register out = locations->Out().AsRegister<Register>();
3625   Register numerator = locations->InAt(0).AsRegister<Register>();
3626 
3627   int32_t imm = Int64FromConstant(second.GetConstant());
3628   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3629   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3630 
3631   Register tmp = locations->GetTemp(0).AsRegister<Register>();
3632   NearLabel done;
3633   __ movl(out, numerator);
3634   __ andl(out, Immediate(abs_imm-1));
3635   __ j(Condition::kZero, &done);
3636   __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3637   __ testl(numerator, numerator);
3638   __ cmovl(Condition::kLess, out, tmp);
3639   __ Bind(&done);
3640 }
3641 
DivByPowerOfTwo(HDiv * instruction)3642 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
3643   LocationSummary* locations = instruction->GetLocations();
3644 
3645   Register out_register = locations->Out().AsRegister<Register>();
3646   Register input_register = locations->InAt(0).AsRegister<Register>();
3647   int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3648   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3649   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
3650 
3651   Register num = locations->GetTemp(0).AsRegister<Register>();
3652 
3653   __ leal(num, Address(input_register, abs_imm - 1));
3654   __ testl(input_register, input_register);
3655   __ cmovl(kGreaterEqual, num, input_register);
3656   int shift = CTZ(imm);
3657   __ sarl(num, Immediate(shift));
3658 
3659   if (imm < 0) {
3660     __ negl(num);
3661   }
3662 
3663   __ movl(out_register, num);
3664 }
3665 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3666 void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3667   DCHECK(instruction->IsDiv() || instruction->IsRem());
3668 
3669   LocationSummary* locations = instruction->GetLocations();
3670   int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
3671 
3672   Register eax = locations->InAt(0).AsRegister<Register>();
3673   Register out = locations->Out().AsRegister<Register>();
3674   Register num;
3675   Register edx;
3676 
3677   if (instruction->IsDiv()) {
3678     edx = locations->GetTemp(0).AsRegister<Register>();
3679     num = locations->GetTemp(1).AsRegister<Register>();
3680   } else {
3681     edx = locations->Out().AsRegister<Register>();
3682     num = locations->GetTemp(0).AsRegister<Register>();
3683   }
3684 
3685   DCHECK_EQ(EAX, eax);
3686   DCHECK_EQ(EDX, edx);
3687   if (instruction->IsDiv()) {
3688     DCHECK_EQ(EAX, out);
3689   } else {
3690     DCHECK_EQ(EDX, out);
3691   }
3692 
3693   int64_t magic;
3694   int shift;
3695   CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
3696 
3697   // Save the numerator.
3698   __ movl(num, eax);
3699 
3700   // EAX = magic
3701   __ movl(eax, Immediate(magic));
3702 
3703   // EDX:EAX = magic * numerator
3704   __ imull(num);
3705 
3706   if (imm > 0 && magic < 0) {
3707     // EDX += num
3708     __ addl(edx, num);
3709   } else if (imm < 0 && magic > 0) {
3710     __ subl(edx, num);
3711   }
3712 
3713   // Shift if needed.
3714   if (shift != 0) {
3715     __ sarl(edx, Immediate(shift));
3716   }
3717 
3718   // EDX += 1 if EDX < 0
3719   __ movl(eax, edx);
3720   __ shrl(edx, Immediate(31));
3721   __ addl(edx, eax);
3722 
3723   if (instruction->IsRem()) {
3724     __ movl(eax, num);
3725     __ imull(edx, Immediate(imm));
3726     __ subl(eax, edx);
3727     __ movl(edx, eax);
3728   } else {
3729     __ movl(eax, edx);
3730   }
3731 }
3732 
GenerateDivRemIntegral(HBinaryOperation * instruction)3733 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3734   DCHECK(instruction->IsDiv() || instruction->IsRem());
3735 
3736   LocationSummary* locations = instruction->GetLocations();
3737   Location out = locations->Out();
3738   Location first = locations->InAt(0);
3739   Location second = locations->InAt(1);
3740   bool is_div = instruction->IsDiv();
3741 
3742   switch (instruction->GetResultType()) {
3743     case DataType::Type::kInt32: {
3744       DCHECK_EQ(EAX, first.AsRegister<Register>());
3745       DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
3746 
3747       if (second.IsConstant()) {
3748         int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
3749 
3750         if (imm == 0) {
3751           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
3752         } else if (imm == 1 || imm == -1) {
3753           DivRemOneOrMinusOne(instruction);
3754         } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3755           if (is_div) {
3756             DivByPowerOfTwo(instruction->AsDiv());
3757           } else {
3758             RemByPowerOfTwo(instruction->AsRem());
3759           }
3760         } else {
3761           DCHECK(imm <= -2 || imm >= 2);
3762           GenerateDivRemWithAnyConstant(instruction);
3763         }
3764       } else {
3765         SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86(
3766             instruction, out.AsRegister<Register>(), is_div);
3767         codegen_->AddSlowPath(slow_path);
3768 
3769         Register second_reg = second.AsRegister<Register>();
3770         // 0x80000000/-1 triggers an arithmetic exception!
3771         // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
3772         // it's safe to just use negl instead of more complex comparisons.
3773 
3774         __ cmpl(second_reg, Immediate(-1));
3775         __ j(kEqual, slow_path->GetEntryLabel());
3776 
3777         // edx:eax <- sign-extended of eax
3778         __ cdq();
3779         // eax = quotient, edx = remainder
3780         __ idivl(second_reg);
3781         __ Bind(slow_path->GetExitLabel());
3782       }
3783       break;
3784     }
3785 
3786     case DataType::Type::kInt64: {
3787       InvokeRuntimeCallingConvention calling_convention;
3788       DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
3789       DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
3790       DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
3791       DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
3792       DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
3793       DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
3794 
3795       if (is_div) {
3796         codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc());
3797         CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
3798       } else {
3799         codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc());
3800         CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
3801       }
3802       break;
3803     }
3804 
3805     default:
3806       LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
3807   }
3808 }
3809 
VisitDiv(HDiv * div)3810 void LocationsBuilderX86::VisitDiv(HDiv* div) {
3811   LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64)
3812       ? LocationSummary::kCallOnMainOnly
3813       : LocationSummary::kNoCall;
3814   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind);
3815 
3816   switch (div->GetResultType()) {
3817     case DataType::Type::kInt32: {
3818       locations->SetInAt(0, Location::RegisterLocation(EAX));
3819       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3820       locations->SetOut(Location::SameAsFirstInput());
3821       // Intel uses edx:eax as the dividend.
3822       locations->AddTemp(Location::RegisterLocation(EDX));
3823       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3824       // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
3825       // output and request another temp.
3826       if (div->InputAt(1)->IsIntConstant()) {
3827         locations->AddTemp(Location::RequiresRegister());
3828       }
3829       break;
3830     }
3831     case DataType::Type::kInt64: {
3832       InvokeRuntimeCallingConvention calling_convention;
3833       locations->SetInAt(0, Location::RegisterPairLocation(
3834           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3835       locations->SetInAt(1, Location::RegisterPairLocation(
3836           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3837       // Runtime helper puts the result in EAX, EDX.
3838       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3839       break;
3840     }
3841     case DataType::Type::kFloat32:
3842     case DataType::Type::kFloat64: {
3843       locations->SetInAt(0, Location::RequiresFpuRegister());
3844       if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3845         DCHECK(div->InputAt(1)->IsEmittedAtUseSite());
3846       } else if (div->InputAt(1)->IsConstant()) {
3847         locations->SetInAt(1, Location::RequiresFpuRegister());
3848       } else {
3849         locations->SetInAt(1, Location::Any());
3850       }
3851       locations->SetOut(Location::SameAsFirstInput());
3852       break;
3853     }
3854 
3855     default:
3856       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3857   }
3858 }
3859 
VisitDiv(HDiv * div)3860 void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
3861   LocationSummary* locations = div->GetLocations();
3862   Location first = locations->InAt(0);
3863   Location second = locations->InAt(1);
3864 
3865   switch (div->GetResultType()) {
3866     case DataType::Type::kInt32:
3867     case DataType::Type::kInt64: {
3868       GenerateDivRemIntegral(div);
3869       break;
3870     }
3871 
3872     case DataType::Type::kFloat32: {
3873       if (second.IsFpuRegister()) {
3874         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3875       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3876         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3877         DCHECK(const_area->IsEmittedAtUseSite());
3878         __ divss(first.AsFpuRegister<XmmRegister>(),
3879                  codegen_->LiteralFloatAddress(
3880                    const_area->GetConstant()->AsFloatConstant()->GetValue(),
3881                    const_area->GetBaseMethodAddress(),
3882                    const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3883       } else {
3884         DCHECK(second.IsStackSlot());
3885         __ divss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3886       }
3887       break;
3888     }
3889 
3890     case DataType::Type::kFloat64: {
3891       if (second.IsFpuRegister()) {
3892         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3893       } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) {
3894         HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable();
3895         DCHECK(const_area->IsEmittedAtUseSite());
3896         __ divsd(first.AsFpuRegister<XmmRegister>(),
3897                  codegen_->LiteralDoubleAddress(
3898                      const_area->GetConstant()->AsDoubleConstant()->GetValue(),
3899                      const_area->GetBaseMethodAddress(),
3900                      const_area->GetLocations()->InAt(0).AsRegister<Register>()));
3901       } else {
3902         DCHECK(second.IsDoubleStackSlot());
3903         __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
3904       }
3905       break;
3906     }
3907 
3908     default:
3909       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3910   }
3911 }
3912 
VisitRem(HRem * rem)3913 void LocationsBuilderX86::VisitRem(HRem* rem) {
3914   DataType::Type type = rem->GetResultType();
3915 
3916   LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64)
3917       ? LocationSummary::kCallOnMainOnly
3918       : LocationSummary::kNoCall;
3919   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
3920 
3921   switch (type) {
3922     case DataType::Type::kInt32: {
3923       locations->SetInAt(0, Location::RegisterLocation(EAX));
3924       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3925       locations->SetOut(Location::RegisterLocation(EDX));
3926       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3927       // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
3928       // output and request another temp.
3929       if (rem->InputAt(1)->IsIntConstant()) {
3930         locations->AddTemp(Location::RequiresRegister());
3931       }
3932       break;
3933     }
3934     case DataType::Type::kInt64: {
3935       InvokeRuntimeCallingConvention calling_convention;
3936       locations->SetInAt(0, Location::RegisterPairLocation(
3937           calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
3938       locations->SetInAt(1, Location::RegisterPairLocation(
3939           calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
3940       // Runtime helper puts the result in EAX, EDX.
3941       locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
3942       break;
3943     }
3944     case DataType::Type::kFloat64:
3945     case DataType::Type::kFloat32: {
3946       locations->SetInAt(0, Location::Any());
3947       locations->SetInAt(1, Location::Any());
3948       locations->SetOut(Location::RequiresFpuRegister());
3949       locations->AddTemp(Location::RegisterLocation(EAX));
3950       break;
3951     }
3952 
3953     default:
3954       LOG(FATAL) << "Unexpected rem type " << type;
3955   }
3956 }
3957 
VisitRem(HRem * rem)3958 void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
3959   DataType::Type type = rem->GetResultType();
3960   switch (type) {
3961     case DataType::Type::kInt32:
3962     case DataType::Type::kInt64: {
3963       GenerateDivRemIntegral(rem);
3964       break;
3965     }
3966     case DataType::Type::kFloat32:
3967     case DataType::Type::kFloat64: {
3968       GenerateRemFP(rem);
3969       break;
3970     }
3971     default:
3972       LOG(FATAL) << "Unexpected rem type " << type;
3973   }
3974 }
3975 
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)3976 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
3977   LocationSummary* locations = new (allocator) LocationSummary(minmax);
3978   switch (minmax->GetResultType()) {
3979     case DataType::Type::kInt32:
3980       locations->SetInAt(0, Location::RequiresRegister());
3981       locations->SetInAt(1, Location::RequiresRegister());
3982       locations->SetOut(Location::SameAsFirstInput());
3983       break;
3984     case DataType::Type::kInt64:
3985       locations->SetInAt(0, Location::RequiresRegister());
3986       locations->SetInAt(1, Location::RequiresRegister());
3987       locations->SetOut(Location::SameAsFirstInput());
3988       // Register to use to perform a long subtract to set cc.
3989       locations->AddTemp(Location::RequiresRegister());
3990       break;
3991     case DataType::Type::kFloat32:
3992       locations->SetInAt(0, Location::RequiresFpuRegister());
3993       locations->SetInAt(1, Location::RequiresFpuRegister());
3994       locations->SetOut(Location::SameAsFirstInput());
3995       locations->AddTemp(Location::RequiresRegister());
3996       break;
3997     case DataType::Type::kFloat64:
3998       locations->SetInAt(0, Location::RequiresFpuRegister());
3999       locations->SetInAt(1, Location::RequiresFpuRegister());
4000       locations->SetOut(Location::SameAsFirstInput());
4001       break;
4002     default:
4003       LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4004   }
4005 }
4006 
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4007 void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
4008                                                     bool is_min,
4009                                                     DataType::Type type) {
4010   Location op1_loc = locations->InAt(0);
4011   Location op2_loc = locations->InAt(1);
4012 
4013   // Shortcut for same input locations.
4014   if (op1_loc.Equals(op2_loc)) {
4015     // Can return immediately, as op1_loc == out_loc.
4016     // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4017     //       a copy here.
4018     DCHECK(locations->Out().Equals(op1_loc));
4019     return;
4020   }
4021 
4022   if (type == DataType::Type::kInt64) {
4023     // Need to perform a subtract to get the sign right.
4024     // op1 is already in the same location as the output.
4025     Location output = locations->Out();
4026     Register output_lo = output.AsRegisterPairLow<Register>();
4027     Register output_hi = output.AsRegisterPairHigh<Register>();
4028 
4029     Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
4030     Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
4031 
4032     // The comparison is performed by subtracting the second operand from
4033     // the first operand and then setting the status flags in the same
4034     // manner as the SUB instruction."
4035     __ cmpl(output_lo, op2_lo);
4036 
4037     // Now use a temp and the borrow to finish the subtraction of op2_hi.
4038     Register temp = locations->GetTemp(0).AsRegister<Register>();
4039     __ movl(temp, output_hi);
4040     __ sbbl(temp, op2_hi);
4041 
4042     // Now the condition code is correct.
4043     Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
4044     __ cmovl(cond, output_lo, op2_lo);
4045     __ cmovl(cond, output_hi, op2_hi);
4046   } else {
4047     DCHECK_EQ(type, DataType::Type::kInt32);
4048     Register out = locations->Out().AsRegister<Register>();
4049     Register op2 = op2_loc.AsRegister<Register>();
4050 
4051     //  (out := op1)
4052     //  out <=? op2
4053     //  if out is min jmp done
4054     //  out := op2
4055     // done:
4056 
4057     __ cmpl(out, op2);
4058     Condition cond = is_min ? Condition::kGreater : Condition::kLess;
4059     __ cmovl(cond, out, op2);
4060   }
4061 }
4062 
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4063 void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
4064                                                    bool is_min,
4065                                                    DataType::Type type) {
4066   Location op1_loc = locations->InAt(0);
4067   Location op2_loc = locations->InAt(1);
4068   Location out_loc = locations->Out();
4069   XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4070 
4071   // Shortcut for same input locations.
4072   if (op1_loc.Equals(op2_loc)) {
4073     DCHECK(out_loc.Equals(op1_loc));
4074     return;
4075   }
4076 
4077   //  (out := op1)
4078   //  out <=? op2
4079   //  if Nan jmp Nan_label
4080   //  if out is min jmp done
4081   //  if op2 is min jmp op2_label
4082   //  handle -0/+0
4083   //  jmp done
4084   // Nan_label:
4085   //  out := NaN
4086   // op2_label:
4087   //  out := op2
4088   // done:
4089   //
4090   // This removes one jmp, but needs to copy one input (op1) to out.
4091   //
4092   // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
4093 
4094   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4095 
4096   NearLabel nan, done, op2_label;
4097   if (type == DataType::Type::kFloat64) {
4098     __ ucomisd(out, op2);
4099   } else {
4100     DCHECK_EQ(type, DataType::Type::kFloat32);
4101     __ ucomiss(out, op2);
4102   }
4103 
4104   __ j(Condition::kParityEven, &nan);
4105 
4106   __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4107   __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4108 
4109   // Handle 0.0/-0.0.
4110   if (is_min) {
4111     if (type == DataType::Type::kFloat64) {
4112       __ orpd(out, op2);
4113     } else {
4114       __ orps(out, op2);
4115     }
4116   } else {
4117     if (type == DataType::Type::kFloat64) {
4118       __ andpd(out, op2);
4119     } else {
4120       __ andps(out, op2);
4121     }
4122   }
4123   __ jmp(&done);
4124 
4125   // NaN handling.
4126   __ Bind(&nan);
4127   if (type == DataType::Type::kFloat64) {
4128     // TODO: Use a constant from the constant table (requires extra input).
4129     __ LoadLongConstant(out, kDoubleNaN);
4130   } else {
4131     Register constant = locations->GetTemp(0).AsRegister<Register>();
4132     __ movl(constant, Immediate(kFloatNaN));
4133     __ movd(out, constant);
4134   }
4135   __ jmp(&done);
4136 
4137   // out := op2;
4138   __ Bind(&op2_label);
4139   if (type == DataType::Type::kFloat64) {
4140     __ movsd(out, op2);
4141   } else {
4142     __ movss(out, op2);
4143   }
4144 
4145   // Done.
4146   __ Bind(&done);
4147 }
4148 
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4149 void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4150   DataType::Type type = minmax->GetResultType();
4151   switch (type) {
4152     case DataType::Type::kInt32:
4153     case DataType::Type::kInt64:
4154       GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4155       break;
4156     case DataType::Type::kFloat32:
4157     case DataType::Type::kFloat64:
4158       GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4159       break;
4160     default:
4161       LOG(FATAL) << "Unexpected type for HMinMax " << type;
4162   }
4163 }
4164 
VisitMin(HMin * min)4165 void LocationsBuilderX86::VisitMin(HMin* min) {
4166   CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4167 }
4168 
VisitMin(HMin * min)4169 void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
4170   GenerateMinMax(min, /*is_min*/ true);
4171 }
4172 
VisitMax(HMax * max)4173 void LocationsBuilderX86::VisitMax(HMax* max) {
4174   CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4175 }
4176 
VisitMax(HMax * max)4177 void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
4178   GenerateMinMax(max, /*is_min*/ false);
4179 }
4180 
VisitAbs(HAbs * abs)4181 void LocationsBuilderX86::VisitAbs(HAbs* abs) {
4182   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4183   switch (abs->GetResultType()) {
4184     case DataType::Type::kInt32:
4185       locations->SetInAt(0, Location::RegisterLocation(EAX));
4186       locations->SetOut(Location::SameAsFirstInput());
4187       locations->AddTemp(Location::RegisterLocation(EDX));
4188       break;
4189     case DataType::Type::kInt64:
4190       locations->SetInAt(0, Location::RequiresRegister());
4191       locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
4192       locations->AddTemp(Location::RequiresRegister());
4193       break;
4194     case DataType::Type::kFloat32:
4195       locations->SetInAt(0, Location::RequiresFpuRegister());
4196       locations->SetOut(Location::SameAsFirstInput());
4197       locations->AddTemp(Location::RequiresFpuRegister());
4198       locations->AddTemp(Location::RequiresRegister());
4199       break;
4200     case DataType::Type::kFloat64:
4201       locations->SetInAt(0, Location::RequiresFpuRegister());
4202       locations->SetOut(Location::SameAsFirstInput());
4203       locations->AddTemp(Location::RequiresFpuRegister());
4204       break;
4205     default:
4206       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4207   }
4208 }
4209 
VisitAbs(HAbs * abs)4210 void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
4211   LocationSummary* locations = abs->GetLocations();
4212   switch (abs->GetResultType()) {
4213     case DataType::Type::kInt32: {
4214       Register out = locations->Out().AsRegister<Register>();
4215       DCHECK_EQ(out, EAX);
4216       Register temp = locations->GetTemp(0).AsRegister<Register>();
4217       DCHECK_EQ(temp, EDX);
4218       // Sign extend EAX into EDX.
4219       __ cdq();
4220       // XOR EAX with sign.
4221       __ xorl(EAX, EDX);
4222       // Subtract out sign to correct.
4223       __ subl(EAX, EDX);
4224       // The result is in EAX.
4225       break;
4226     }
4227     case DataType::Type::kInt64: {
4228       Location input = locations->InAt(0);
4229       Register input_lo = input.AsRegisterPairLow<Register>();
4230       Register input_hi = input.AsRegisterPairHigh<Register>();
4231       Location output = locations->Out();
4232       Register output_lo = output.AsRegisterPairLow<Register>();
4233       Register output_hi = output.AsRegisterPairHigh<Register>();
4234       Register temp = locations->GetTemp(0).AsRegister<Register>();
4235       // Compute the sign into the temporary.
4236       __ movl(temp, input_hi);
4237       __ sarl(temp, Immediate(31));
4238       // Store the sign into the output.
4239       __ movl(output_lo, temp);
4240       __ movl(output_hi, temp);
4241       // XOR the input to the output.
4242       __ xorl(output_lo, input_lo);
4243       __ xorl(output_hi, input_hi);
4244       // Subtract the sign.
4245       __ subl(output_lo, temp);
4246       __ sbbl(output_hi, temp);
4247       break;
4248     }
4249     case DataType::Type::kFloat32: {
4250       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4251       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4252       Register constant = locations->GetTemp(1).AsRegister<Register>();
4253       __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
4254       __ movd(temp, constant);
4255       __ andps(out, temp);
4256       break;
4257     }
4258     case DataType::Type::kFloat64: {
4259       XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4260       XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4261       // TODO: Use a constant from the constant table (requires extra input).
4262       __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
4263       __ andpd(out, temp);
4264       break;
4265     }
4266     default:
4267       LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4268   }
4269 }
4270 
VisitDivZeroCheck(HDivZeroCheck * instruction)4271 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4272   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4273   switch (instruction->GetType()) {
4274     case DataType::Type::kBool:
4275     case DataType::Type::kUint8:
4276     case DataType::Type::kInt8:
4277     case DataType::Type::kUint16:
4278     case DataType::Type::kInt16:
4279     case DataType::Type::kInt32: {
4280       locations->SetInAt(0, Location::Any());
4281       break;
4282     }
4283     case DataType::Type::kInt64: {
4284       locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4285       if (!instruction->IsConstant()) {
4286         locations->AddTemp(Location::RequiresRegister());
4287       }
4288       break;
4289     }
4290     default:
4291       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4292   }
4293 }
4294 
VisitDivZeroCheck(HDivZeroCheck * instruction)4295 void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4296   SlowPathCode* slow_path =
4297       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction);
4298   codegen_->AddSlowPath(slow_path);
4299 
4300   LocationSummary* locations = instruction->GetLocations();
4301   Location value = locations->InAt(0);
4302 
4303   switch (instruction->GetType()) {
4304     case DataType::Type::kBool:
4305     case DataType::Type::kUint8:
4306     case DataType::Type::kInt8:
4307     case DataType::Type::kUint16:
4308     case DataType::Type::kInt16:
4309     case DataType::Type::kInt32: {
4310       if (value.IsRegister()) {
4311         __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
4312         __ j(kEqual, slow_path->GetEntryLabel());
4313       } else if (value.IsStackSlot()) {
4314         __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
4315         __ j(kEqual, slow_path->GetEntryLabel());
4316       } else {
4317         DCHECK(value.IsConstant()) << value;
4318         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4319           __ jmp(slow_path->GetEntryLabel());
4320         }
4321       }
4322       break;
4323     }
4324     case DataType::Type::kInt64: {
4325       if (value.IsRegisterPair()) {
4326         Register temp = locations->GetTemp(0).AsRegister<Register>();
4327         __ movl(temp, value.AsRegisterPairLow<Register>());
4328         __ orl(temp, value.AsRegisterPairHigh<Register>());
4329         __ j(kEqual, slow_path->GetEntryLabel());
4330       } else {
4331         DCHECK(value.IsConstant()) << value;
4332         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4333           __ jmp(slow_path->GetEntryLabel());
4334         }
4335       }
4336       break;
4337     }
4338     default:
4339       LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
4340   }
4341 }
4342 
HandleShift(HBinaryOperation * op)4343 void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
4344   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4345 
4346   LocationSummary* locations =
4347       new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4348 
4349   switch (op->GetResultType()) {
4350     case DataType::Type::kInt32:
4351     case DataType::Type::kInt64: {
4352       // Can't have Location::Any() and output SameAsFirstInput()
4353       locations->SetInAt(0, Location::RequiresRegister());
4354       // The shift count needs to be in CL or a constant.
4355       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
4356       locations->SetOut(Location::SameAsFirstInput());
4357       break;
4358     }
4359     default:
4360       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4361   }
4362 }
4363 
HandleShift(HBinaryOperation * op)4364 void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
4365   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4366 
4367   LocationSummary* locations = op->GetLocations();
4368   Location first = locations->InAt(0);
4369   Location second = locations->InAt(1);
4370   DCHECK(first.Equals(locations->Out()));
4371 
4372   switch (op->GetResultType()) {
4373     case DataType::Type::kInt32: {
4374       DCHECK(first.IsRegister());
4375       Register first_reg = first.AsRegister<Register>();
4376       if (second.IsRegister()) {
4377         Register second_reg = second.AsRegister<Register>();
4378         DCHECK_EQ(ECX, second_reg);
4379         if (op->IsShl()) {
4380           __ shll(first_reg, second_reg);
4381         } else if (op->IsShr()) {
4382           __ sarl(first_reg, second_reg);
4383         } else {
4384           __ shrl(first_reg, second_reg);
4385         }
4386       } else {
4387         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance;
4388         if (shift == 0) {
4389           return;
4390         }
4391         Immediate imm(shift);
4392         if (op->IsShl()) {
4393           __ shll(first_reg, imm);
4394         } else if (op->IsShr()) {
4395           __ sarl(first_reg, imm);
4396         } else {
4397           __ shrl(first_reg, imm);
4398         }
4399       }
4400       break;
4401     }
4402     case DataType::Type::kInt64: {
4403       if (second.IsRegister()) {
4404         Register second_reg = second.AsRegister<Register>();
4405         DCHECK_EQ(ECX, second_reg);
4406         if (op->IsShl()) {
4407           GenerateShlLong(first, second_reg);
4408         } else if (op->IsShr()) {
4409           GenerateShrLong(first, second_reg);
4410         } else {
4411           GenerateUShrLong(first, second_reg);
4412         }
4413       } else {
4414         // Shift by a constant.
4415         int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4416         // Nothing to do if the shift is 0, as the input is already the output.
4417         if (shift != 0) {
4418           if (op->IsShl()) {
4419             GenerateShlLong(first, shift);
4420           } else if (op->IsShr()) {
4421             GenerateShrLong(first, shift);
4422           } else {
4423             GenerateUShrLong(first, shift);
4424           }
4425         }
4426       }
4427       break;
4428     }
4429     default:
4430       LOG(FATAL) << "Unexpected op type " << op->GetResultType();
4431   }
4432 }
4433 
GenerateShlLong(const Location & loc,int shift)4434 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
4435   Register low = loc.AsRegisterPairLow<Register>();
4436   Register high = loc.AsRegisterPairHigh<Register>();
4437   if (shift == 1) {
4438     // This is just an addition.
4439     __ addl(low, low);
4440     __ adcl(high, high);
4441   } else if (shift == 32) {
4442     // Shift by 32 is easy. High gets low, and low gets 0.
4443     codegen_->EmitParallelMoves(
4444         loc.ToLow(),
4445         loc.ToHigh(),
4446         DataType::Type::kInt32,
4447         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4448         loc.ToLow(),
4449         DataType::Type::kInt32);
4450   } else if (shift > 32) {
4451     // Low part becomes 0.  High part is low part << (shift-32).
4452     __ movl(high, low);
4453     __ shll(high, Immediate(shift - 32));
4454     __ xorl(low, low);
4455   } else {
4456     // Between 1 and 31.
4457     __ shld(high, low, Immediate(shift));
4458     __ shll(low, Immediate(shift));
4459   }
4460 }
4461 
GenerateShlLong(const Location & loc,Register shifter)4462 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
4463   NearLabel done;
4464   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
4465   __ shll(loc.AsRegisterPairLow<Register>(), shifter);
4466   __ testl(shifter, Immediate(32));
4467   __ j(kEqual, &done);
4468   __ movl(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>());
4469   __ movl(loc.AsRegisterPairLow<Register>(), Immediate(0));
4470   __ Bind(&done);
4471 }
4472 
GenerateShrLong(const Location & loc,int shift)4473 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
4474   Register low = loc.AsRegisterPairLow<Register>();
4475   Register high = loc.AsRegisterPairHigh<Register>();
4476   if (shift == 32) {
4477     // Need to copy the sign.
4478     DCHECK_NE(low, high);
4479     __ movl(low, high);
4480     __ sarl(high, Immediate(31));
4481   } else if (shift > 32) {
4482     DCHECK_NE(low, high);
4483     // High part becomes sign. Low part is shifted by shift - 32.
4484     __ movl(low, high);
4485     __ sarl(high, Immediate(31));
4486     __ sarl(low, Immediate(shift - 32));
4487   } else {
4488     // Between 1 and 31.
4489     __ shrd(low, high, Immediate(shift));
4490     __ sarl(high, Immediate(shift));
4491   }
4492 }
4493 
GenerateShrLong(const Location & loc,Register shifter)4494 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
4495   NearLabel done;
4496   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4497   __ sarl(loc.AsRegisterPairHigh<Register>(), shifter);
4498   __ testl(shifter, Immediate(32));
4499   __ j(kEqual, &done);
4500   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4501   __ sarl(loc.AsRegisterPairHigh<Register>(), Immediate(31));
4502   __ Bind(&done);
4503 }
4504 
GenerateUShrLong(const Location & loc,int shift)4505 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
4506   Register low = loc.AsRegisterPairLow<Register>();
4507   Register high = loc.AsRegisterPairHigh<Register>();
4508   if (shift == 32) {
4509     // Shift by 32 is easy. Low gets high, and high gets 0.
4510     codegen_->EmitParallelMoves(
4511         loc.ToHigh(),
4512         loc.ToLow(),
4513         DataType::Type::kInt32,
4514         Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
4515         loc.ToHigh(),
4516         DataType::Type::kInt32);
4517   } else if (shift > 32) {
4518     // Low part is high >> (shift - 32). High part becomes 0.
4519     __ movl(low, high);
4520     __ shrl(low, Immediate(shift - 32));
4521     __ xorl(high, high);
4522   } else {
4523     // Between 1 and 31.
4524     __ shrd(low, high, Immediate(shift));
4525     __ shrl(high, Immediate(shift));
4526   }
4527 }
4528 
GenerateUShrLong(const Location & loc,Register shifter)4529 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
4530   NearLabel done;
4531   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
4532   __ shrl(loc.AsRegisterPairHigh<Register>(), shifter);
4533   __ testl(shifter, Immediate(32));
4534   __ j(kEqual, &done);
4535   __ movl(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>());
4536   __ movl(loc.AsRegisterPairHigh<Register>(), Immediate(0));
4537   __ Bind(&done);
4538 }
4539 
VisitRor(HRor * ror)4540 void LocationsBuilderX86::VisitRor(HRor* ror) {
4541   LocationSummary* locations =
4542       new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4543 
4544   switch (ror->GetResultType()) {
4545     case DataType::Type::kInt64:
4546       // Add the temporary needed.
4547       locations->AddTemp(Location::RequiresRegister());
4548       FALLTHROUGH_INTENDED;
4549     case DataType::Type::kInt32:
4550       locations->SetInAt(0, Location::RequiresRegister());
4551       // The shift count needs to be in CL (unless it is a constant).
4552       locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
4553       locations->SetOut(Location::SameAsFirstInput());
4554       break;
4555     default:
4556       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4557       UNREACHABLE();
4558   }
4559 }
4560 
VisitRor(HRor * ror)4561 void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
4562   LocationSummary* locations = ror->GetLocations();
4563   Location first = locations->InAt(0);
4564   Location second = locations->InAt(1);
4565 
4566   if (ror->GetResultType() == DataType::Type::kInt32) {
4567     Register first_reg = first.AsRegister<Register>();
4568     if (second.IsRegister()) {
4569       Register second_reg = second.AsRegister<Register>();
4570       __ rorl(first_reg, second_reg);
4571     } else {
4572       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4573       __ rorl(first_reg, imm);
4574     }
4575     return;
4576   }
4577 
4578   DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64);
4579   Register first_reg_lo = first.AsRegisterPairLow<Register>();
4580   Register first_reg_hi = first.AsRegisterPairHigh<Register>();
4581   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
4582   if (second.IsRegister()) {
4583     Register second_reg = second.AsRegister<Register>();
4584     DCHECK_EQ(second_reg, ECX);
4585     __ movl(temp_reg, first_reg_hi);
4586     __ shrd(first_reg_hi, first_reg_lo, second_reg);
4587     __ shrd(first_reg_lo, temp_reg, second_reg);
4588     __ movl(temp_reg, first_reg_hi);
4589     __ testl(second_reg, Immediate(32));
4590     __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
4591     __ cmovl(kNotEqual, first_reg_lo, temp_reg);
4592   } else {
4593     int32_t shift_amt = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance;
4594     if (shift_amt == 0) {
4595       // Already fine.
4596       return;
4597     }
4598     if (shift_amt == 32) {
4599       // Just swap.
4600       __ movl(temp_reg, first_reg_lo);
4601       __ movl(first_reg_lo, first_reg_hi);
4602       __ movl(first_reg_hi, temp_reg);
4603       return;
4604     }
4605 
4606     Immediate imm(shift_amt);
4607     // Save the constents of the low value.
4608     __ movl(temp_reg, first_reg_lo);
4609 
4610     // Shift right into low, feeding bits from high.
4611     __ shrd(first_reg_lo, first_reg_hi, imm);
4612 
4613     // Shift right into high, feeding bits from the original low.
4614     __ shrd(first_reg_hi, temp_reg, imm);
4615 
4616     // Swap if needed.
4617     if (shift_amt > 32) {
4618       __ movl(temp_reg, first_reg_lo);
4619       __ movl(first_reg_lo, first_reg_hi);
4620       __ movl(first_reg_hi, temp_reg);
4621     }
4622   }
4623 }
4624 
VisitShl(HShl * shl)4625 void LocationsBuilderX86::VisitShl(HShl* shl) {
4626   HandleShift(shl);
4627 }
4628 
VisitShl(HShl * shl)4629 void InstructionCodeGeneratorX86::VisitShl(HShl* shl) {
4630   HandleShift(shl);
4631 }
4632 
VisitShr(HShr * shr)4633 void LocationsBuilderX86::VisitShr(HShr* shr) {
4634   HandleShift(shr);
4635 }
4636 
VisitShr(HShr * shr)4637 void InstructionCodeGeneratorX86::VisitShr(HShr* shr) {
4638   HandleShift(shr);
4639 }
4640 
VisitUShr(HUShr * ushr)4641 void LocationsBuilderX86::VisitUShr(HUShr* ushr) {
4642   HandleShift(ushr);
4643 }
4644 
VisitUShr(HUShr * ushr)4645 void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) {
4646   HandleShift(ushr);
4647 }
4648 
VisitNewInstance(HNewInstance * instruction)4649 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
4650   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4651       instruction, LocationSummary::kCallOnMainOnly);
4652   locations->SetOut(Location::RegisterLocation(EAX));
4653   InvokeRuntimeCallingConvention calling_convention;
4654   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4655 }
4656 
VisitNewInstance(HNewInstance * instruction)4657 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
4658   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4659   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4660   DCHECK(!codegen_->IsLeafMethod());
4661 }
4662 
VisitNewArray(HNewArray * instruction)4663 void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
4664   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4665       instruction, LocationSummary::kCallOnMainOnly);
4666   locations->SetOut(Location::RegisterLocation(EAX));
4667   InvokeRuntimeCallingConvention calling_convention;
4668   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4669   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4670 }
4671 
VisitNewArray(HNewArray * instruction)4672 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
4673   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4674   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4675   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4676   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4677   DCHECK(!codegen_->IsLeafMethod());
4678 }
4679 
VisitParameterValue(HParameterValue * instruction)4680 void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
4681   LocationSummary* locations =
4682       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4683   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4684   if (location.IsStackSlot()) {
4685     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4686   } else if (location.IsDoubleStackSlot()) {
4687     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4688   }
4689   locations->SetOut(location);
4690 }
4691 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4692 void InstructionCodeGeneratorX86::VisitParameterValue(
4693     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4694 }
4695 
VisitCurrentMethod(HCurrentMethod * instruction)4696 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
4697   LocationSummary* locations =
4698       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4699   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4700 }
4701 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4702 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4703 }
4704 
VisitClassTableGet(HClassTableGet * instruction)4705 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
4706   LocationSummary* locations =
4707       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4708   locations->SetInAt(0, Location::RequiresRegister());
4709   locations->SetOut(Location::RequiresRegister());
4710 }
4711 
VisitClassTableGet(HClassTableGet * instruction)4712 void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
4713   LocationSummary* locations = instruction->GetLocations();
4714   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4715     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4716         instruction->GetIndex(), kX86PointerSize).SizeValue();
4717     __ movl(locations->Out().AsRegister<Register>(),
4718             Address(locations->InAt(0).AsRegister<Register>(), method_offset));
4719   } else {
4720     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4721         instruction->GetIndex(), kX86PointerSize));
4722     __ movl(locations->Out().AsRegister<Register>(),
4723             Address(locations->InAt(0).AsRegister<Register>(),
4724                     mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
4725     // temp = temp->GetImtEntryAt(method_offset);
4726     __ movl(locations->Out().AsRegister<Register>(),
4727             Address(locations->Out().AsRegister<Register>(), method_offset));
4728   }
4729 }
4730 
VisitNot(HNot * not_)4731 void LocationsBuilderX86::VisitNot(HNot* not_) {
4732   LocationSummary* locations =
4733       new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4734   locations->SetInAt(0, Location::RequiresRegister());
4735   locations->SetOut(Location::SameAsFirstInput());
4736 }
4737 
VisitNot(HNot * not_)4738 void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
4739   LocationSummary* locations = not_->GetLocations();
4740   Location in = locations->InAt(0);
4741   Location out = locations->Out();
4742   DCHECK(in.Equals(out));
4743   switch (not_->GetResultType()) {
4744     case DataType::Type::kInt32:
4745       __ notl(out.AsRegister<Register>());
4746       break;
4747 
4748     case DataType::Type::kInt64:
4749       __ notl(out.AsRegisterPairLow<Register>());
4750       __ notl(out.AsRegisterPairHigh<Register>());
4751       break;
4752 
4753     default:
4754       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4755   }
4756 }
4757 
VisitBooleanNot(HBooleanNot * bool_not)4758 void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) {
4759   LocationSummary* locations =
4760       new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4761   locations->SetInAt(0, Location::RequiresRegister());
4762   locations->SetOut(Location::SameAsFirstInput());
4763 }
4764 
VisitBooleanNot(HBooleanNot * bool_not)4765 void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) {
4766   LocationSummary* locations = bool_not->GetLocations();
4767   Location in = locations->InAt(0);
4768   Location out = locations->Out();
4769   DCHECK(in.Equals(out));
4770   __ xorl(out.AsRegister<Register>(), Immediate(1));
4771 }
4772 
VisitCompare(HCompare * compare)4773 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
4774   LocationSummary* locations =
4775       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
4776   switch (compare->InputAt(0)->GetType()) {
4777     case DataType::Type::kBool:
4778     case DataType::Type::kUint8:
4779     case DataType::Type::kInt8:
4780     case DataType::Type::kUint16:
4781     case DataType::Type::kInt16:
4782     case DataType::Type::kInt32:
4783     case DataType::Type::kInt64: {
4784       locations->SetInAt(0, Location::RequiresRegister());
4785       locations->SetInAt(1, Location::Any());
4786       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4787       break;
4788     }
4789     case DataType::Type::kFloat32:
4790     case DataType::Type::kFloat64: {
4791       locations->SetInAt(0, Location::RequiresFpuRegister());
4792       if (compare->InputAt(1)->IsX86LoadFromConstantTable()) {
4793         DCHECK(compare->InputAt(1)->IsEmittedAtUseSite());
4794       } else if (compare->InputAt(1)->IsConstant()) {
4795         locations->SetInAt(1, Location::RequiresFpuRegister());
4796       } else {
4797         locations->SetInAt(1, Location::Any());
4798       }
4799       locations->SetOut(Location::RequiresRegister());
4800       break;
4801     }
4802     default:
4803       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4804   }
4805 }
4806 
VisitCompare(HCompare * compare)4807 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
4808   LocationSummary* locations = compare->GetLocations();
4809   Register out = locations->Out().AsRegister<Register>();
4810   Location left = locations->InAt(0);
4811   Location right = locations->InAt(1);
4812 
4813   NearLabel less, greater, done;
4814   Condition less_cond = kLess;
4815 
4816   switch (compare->InputAt(0)->GetType()) {
4817     case DataType::Type::kBool:
4818     case DataType::Type::kUint8:
4819     case DataType::Type::kInt8:
4820     case DataType::Type::kUint16:
4821     case DataType::Type::kInt16:
4822     case DataType::Type::kInt32: {
4823       codegen_->GenerateIntCompare(left, right);
4824       break;
4825     }
4826     case DataType::Type::kInt64: {
4827       Register left_low = left.AsRegisterPairLow<Register>();
4828       Register left_high = left.AsRegisterPairHigh<Register>();
4829       int32_t val_low = 0;
4830       int32_t val_high = 0;
4831       bool right_is_const = false;
4832 
4833       if (right.IsConstant()) {
4834         DCHECK(right.GetConstant()->IsLongConstant());
4835         right_is_const = true;
4836         int64_t val = right.GetConstant()->AsLongConstant()->GetValue();
4837         val_low = Low32Bits(val);
4838         val_high = High32Bits(val);
4839       }
4840 
4841       if (right.IsRegisterPair()) {
4842         __ cmpl(left_high, right.AsRegisterPairHigh<Register>());
4843       } else if (right.IsDoubleStackSlot()) {
4844         __ cmpl(left_high, Address(ESP, right.GetHighStackIndex(kX86WordSize)));
4845       } else {
4846         DCHECK(right_is_const) << right;
4847         codegen_->Compare32BitValue(left_high, val_high);
4848       }
4849       __ j(kLess, &less);  // Signed compare.
4850       __ j(kGreater, &greater);  // Signed compare.
4851       if (right.IsRegisterPair()) {
4852         __ cmpl(left_low, right.AsRegisterPairLow<Register>());
4853       } else if (right.IsDoubleStackSlot()) {
4854         __ cmpl(left_low, Address(ESP, right.GetStackIndex()));
4855       } else {
4856         DCHECK(right_is_const) << right;
4857         codegen_->Compare32BitValue(left_low, val_low);
4858       }
4859       less_cond = kBelow;  // for CF (unsigned).
4860       break;
4861     }
4862     case DataType::Type::kFloat32: {
4863       GenerateFPCompare(left, right, compare, false);
4864       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4865       less_cond = kBelow;  // for CF (floats).
4866       break;
4867     }
4868     case DataType::Type::kFloat64: {
4869       GenerateFPCompare(left, right, compare, true);
4870       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
4871       less_cond = kBelow;  // for CF (floats).
4872       break;
4873     }
4874     default:
4875       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
4876   }
4877 
4878   __ movl(out, Immediate(0));
4879   __ j(kEqual, &done);
4880   __ j(less_cond, &less);
4881 
4882   __ Bind(&greater);
4883   __ movl(out, Immediate(1));
4884   __ jmp(&done);
4885 
4886   __ Bind(&less);
4887   __ movl(out, Immediate(-1));
4888 
4889   __ Bind(&done);
4890 }
4891 
VisitPhi(HPhi * instruction)4892 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
4893   LocationSummary* locations =
4894       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4895   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4896     locations->SetInAt(i, Location::Any());
4897   }
4898   locations->SetOut(Location::Any());
4899 }
4900 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4901 void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4902   LOG(FATAL) << "Unreachable";
4903 }
4904 
GenerateMemoryBarrier(MemBarrierKind kind)4905 void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
4906   /*
4907    * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4908    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
4909    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4910    */
4911   switch (kind) {
4912     case MemBarrierKind::kAnyAny: {
4913       MemoryFence();
4914       break;
4915     }
4916     case MemBarrierKind::kAnyStore:
4917     case MemBarrierKind::kLoadAny:
4918     case MemBarrierKind::kStoreStore: {
4919       // nop
4920       break;
4921     }
4922     case MemBarrierKind::kNTStoreStore:
4923       // Non-Temporal Store/Store needs an explicit fence.
4924       MemoryFence(/* non-temporal= */ true);
4925       break;
4926   }
4927 }
4928 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)4929 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
4930       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4931       ArtMethod* method ATTRIBUTE_UNUSED) {
4932   return desired_dispatch_info;
4933 }
4934 
GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect * invoke,Register temp)4935 Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
4936                                                                  Register temp) {
4937   DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
4938   Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4939   if (!invoke->GetLocations()->Intrinsified()) {
4940     return location.AsRegister<Register>();
4941   }
4942   // For intrinsics we allow any location, so it may be on the stack.
4943   if (!location.IsRegister()) {
4944     __ movl(temp, Address(ESP, location.GetStackIndex()));
4945     return temp;
4946   }
4947   // For register locations, check if the register was saved. If so, get it from the stack.
4948   // Note: There is a chance that the register was saved but not overwritten, so we could
4949   // save one load. However, since this is just an intrinsic slow path we prefer this
4950   // simple and more robust approach rather that trying to determine if that's the case.
4951   SlowPathCode* slow_path = GetCurrentSlowPath();
4952   DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
4953   if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
4954     int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
4955     __ movl(temp, Address(ESP, stack_offset));
4956     return temp;
4957   }
4958   return location.AsRegister<Register>();
4959 }
4960 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)4961 void CodeGeneratorX86::GenerateStaticOrDirectCall(
4962     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4963   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
4964   switch (invoke->GetMethodLoadKind()) {
4965     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4966       // temp = thread->string_init_entrypoint
4967       uint32_t offset =
4968           GetThreadOffset<kX86PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4969       __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(offset));
4970       break;
4971     }
4972     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4973       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4974       break;
4975     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4976       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4977       Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4978                                                                 temp.AsRegister<Register>());
4979       __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset));
4980       RecordBootImageMethodPatch(invoke);
4981       break;
4982     }
4983     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
4984       Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4985                                                                 temp.AsRegister<Register>());
4986       __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
4987       RecordBootImageRelRoPatch(
4988           invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
4989           GetBootImageOffset(invoke));
4990       break;
4991     }
4992     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4993       Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
4994                                                                 temp.AsRegister<Register>());
4995       __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
4996       RecordMethodBssEntryPatch(invoke);
4997       // No need for memory fence, thanks to the x86 memory model.
4998       break;
4999     }
5000     case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
5001       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
5002       break;
5003     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
5004       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
5005       return;  // No code pointer retrieval; the runtime performs the call directly.
5006     }
5007   }
5008 
5009   switch (invoke->GetCodePtrLocation()) {
5010     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
5011       __ call(GetFrameEntryLabel());
5012       break;
5013     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
5014       // (callee_method + offset_of_quick_compiled_code)()
5015       __ call(Address(callee_method.AsRegister<Register>(),
5016                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
5017                           kX86PointerSize).Int32Value()));
5018       break;
5019   }
5020   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5021 
5022   DCHECK(!IsLeafMethod());
5023 }
5024 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)5025 void CodeGeneratorX86::GenerateVirtualCall(
5026     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
5027   Register temp = temp_in.AsRegister<Register>();
5028   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5029       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
5030 
5031   // Use the calling convention instead of the location of the receiver, as
5032   // intrinsics may have put the receiver in a different register. In the intrinsics
5033   // slow path, the arguments have been moved to the right place, so here we are
5034   // guaranteed that the receiver is the first register of the calling convention.
5035   InvokeDexCallingConvention calling_convention;
5036   Register receiver = calling_convention.GetRegisterAt(0);
5037   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5038   // /* HeapReference<Class> */ temp = receiver->klass_
5039   __ movl(temp, Address(receiver, class_offset));
5040   MaybeRecordImplicitNullCheck(invoke);
5041   // Instead of simply (possibly) unpoisoning `temp` here, we should
5042   // emit a read barrier for the previous class reference load.
5043   // However this is not required in practice, as this is an
5044   // intermediate/temporary reference and because the current
5045   // concurrent copying collector keeps the from-space memory
5046   // intact/accessible until the end of the marking phase (the
5047   // concurrent copying collector may not in the future).
5048   __ MaybeUnpoisonHeapReference(temp);
5049 
5050   MaybeGenerateInlineCacheCheck(invoke, temp);
5051 
5052   // temp = temp->GetMethodAt(method_offset);
5053   __ movl(temp, Address(temp, method_offset));
5054   // call temp->GetEntryPoint();
5055   __ call(Address(
5056       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
5057   RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
5058 }
5059 
RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t intrinsic_data)5060 void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
5061                                                      uint32_t intrinsic_data) {
5062   boot_image_other_patches_.emplace_back(
5063       method_address, /* target_dex_file= */ nullptr, intrinsic_data);
5064   __ Bind(&boot_image_other_patches_.back().label);
5065 }
5066 
RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress * method_address,uint32_t boot_image_offset)5067 void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
5068                                                  uint32_t boot_image_offset) {
5069   boot_image_other_patches_.emplace_back(
5070       method_address, /* target_dex_file= */ nullptr, boot_image_offset);
5071   __ Bind(&boot_image_other_patches_.back().label);
5072 }
5073 
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)5074 void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
5075   DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5076   HX86ComputeBaseMethodAddress* method_address =
5077       invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5078   boot_image_method_patches_.emplace_back(
5079       method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
5080   __ Bind(&boot_image_method_patches_.back().label);
5081 }
5082 
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)5083 void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
5084   DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5085   HX86ComputeBaseMethodAddress* method_address =
5086       invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5087   // Add the patch entry and bind its label at the end of the instruction.
5088   method_bss_entry_patches_.emplace_back(
5089       method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
5090   __ Bind(&method_bss_entry_patches_.back().label);
5091 }
5092 
RecordBootImageTypePatch(HLoadClass * load_class)5093 void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) {
5094   HX86ComputeBaseMethodAddress* method_address =
5095       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5096   boot_image_type_patches_.emplace_back(
5097       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5098   __ Bind(&boot_image_type_patches_.back().label);
5099 }
5100 
NewTypeBssEntryPatch(HLoadClass * load_class)5101 Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
5102   HX86ComputeBaseMethodAddress* method_address =
5103       load_class->InputAt(0)->AsX86ComputeBaseMethodAddress();
5104   type_bss_entry_patches_.emplace_back(
5105       method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
5106   return &type_bss_entry_patches_.back().label;
5107 }
5108 
RecordBootImageStringPatch(HLoadString * load_string)5109 void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
5110   HX86ComputeBaseMethodAddress* method_address =
5111       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5112   boot_image_string_patches_.emplace_back(
5113       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5114   __ Bind(&boot_image_string_patches_.back().label);
5115 }
5116 
NewStringBssEntryPatch(HLoadString * load_string)5117 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
5118   HX86ComputeBaseMethodAddress* method_address =
5119       load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
5120   string_bss_entry_patches_.emplace_back(
5121       method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_);
5122   return &string_bss_entry_patches_.back().label;
5123 }
5124 
LoadBootImageAddress(Register reg,uint32_t boot_image_reference,HInvokeStaticOrDirect * invoke)5125 void CodeGeneratorX86::LoadBootImageAddress(Register reg,
5126                                             uint32_t boot_image_reference,
5127                                             HInvokeStaticOrDirect* invoke) {
5128   if (GetCompilerOptions().IsBootImage()) {
5129     DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5130     HX86ComputeBaseMethodAddress* method_address =
5131         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5132     DCHECK(method_address != nullptr);
5133     Register method_address_reg =
5134         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5135     __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
5136     RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
5137   } else if (GetCompilerOptions().GetCompilePic()) {
5138     DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5139     HX86ComputeBaseMethodAddress* method_address =
5140         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5141     DCHECK(method_address != nullptr);
5142     Register method_address_reg =
5143         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5144     __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
5145     RecordBootImageRelRoPatch(method_address, boot_image_reference);
5146   } else {
5147     DCHECK(Runtime::Current()->UseJitCompilation());
5148     gc::Heap* heap = Runtime::Current()->GetHeap();
5149     DCHECK(!heap->GetBootImageSpaces().empty());
5150     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
5151     __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
5152   }
5153 }
5154 
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)5155 void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
5156                                                     uint32_t boot_image_offset) {
5157   DCHECK(invoke->IsStatic());
5158   InvokeRuntimeCallingConvention calling_convention;
5159   Register argument = calling_convention.GetRegisterAt(0);
5160   if (GetCompilerOptions().IsBootImage()) {
5161     DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
5162     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
5163     DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
5164     HX86ComputeBaseMethodAddress* method_address =
5165         invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
5166     DCHECK(method_address != nullptr);
5167     Register method_address_reg =
5168         invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
5169     __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
5170     MethodReference target_method = invoke->GetTargetMethod();
5171     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
5172     boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
5173     __ Bind(&boot_image_type_patches_.back().label);
5174   } else {
5175     LoadBootImageAddress(argument, boot_image_offset, invoke);
5176   }
5177   InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
5178   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5179 }
5180 
5181 // The label points to the end of the "movl" or another instruction but the literal offset
5182 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
5183 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
5184 
5185 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)5186 inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
5187     const ArenaDeque<X86PcRelativePatchInfo>& infos,
5188     ArenaVector<linker::LinkerPatch>* linker_patches) {
5189   for (const X86PcRelativePatchInfo& info : infos) {
5190     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
5191     linker_patches->push_back(Factory(literal_offset,
5192                                       info.target_dex_file,
5193                                       GetMethodAddressOffset(info.method_address),
5194                                       info.offset_or_index));
5195   }
5196 }
5197 
5198 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)5199 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
5200                                      const DexFile* target_dex_file,
5201                                      uint32_t pc_insn_offset,
5202                                      uint32_t boot_image_offset) {
5203   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
5204   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
5205 }
5206 
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)5207 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
5208   DCHECK(linker_patches->empty());
5209   size_t size =
5210       boot_image_method_patches_.size() +
5211       method_bss_entry_patches_.size() +
5212       boot_image_type_patches_.size() +
5213       type_bss_entry_patches_.size() +
5214       boot_image_string_patches_.size() +
5215       string_bss_entry_patches_.size() +
5216       boot_image_other_patches_.size();
5217   linker_patches->reserve(size);
5218   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
5219     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
5220         boot_image_method_patches_, linker_patches);
5221     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
5222         boot_image_type_patches_, linker_patches);
5223     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
5224         boot_image_string_patches_, linker_patches);
5225   } else {
5226     DCHECK(boot_image_method_patches_.empty());
5227     DCHECK(boot_image_type_patches_.empty());
5228     DCHECK(boot_image_string_patches_.empty());
5229   }
5230   if (GetCompilerOptions().IsBootImage()) {
5231     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
5232         boot_image_other_patches_, linker_patches);
5233   } else {
5234     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
5235         boot_image_other_patches_, linker_patches);
5236   }
5237   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
5238       method_bss_entry_patches_, linker_patches);
5239   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
5240       type_bss_entry_patches_, linker_patches);
5241   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
5242       string_bss_entry_patches_, linker_patches);
5243   DCHECK_EQ(size, linker_patches->size());
5244 }
5245 
MarkGCCard(Register temp,Register card,Register object,Register value,bool value_can_be_null)5246 void CodeGeneratorX86::MarkGCCard(Register temp,
5247                                   Register card,
5248                                   Register object,
5249                                   Register value,
5250                                   bool value_can_be_null) {
5251   NearLabel is_null;
5252   if (value_can_be_null) {
5253     __ testl(value, value);
5254     __ j(kEqual, &is_null);
5255   }
5256   // Load the address of the card table into `card`.
5257   __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
5258   // Calculate the offset (in the card table) of the card corresponding to
5259   // `object`.
5260   __ movl(temp, object);
5261   __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
5262   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5263   // `object`'s card.
5264   //
5265   // Register `card` contains the address of the card table. Note that the card
5266   // table's base is biased during its creation so that it always starts at an
5267   // address whose least-significant byte is equal to `kCardDirty` (see
5268   // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5269   // below writes the `kCardDirty` (byte) value into the `object`'s card
5270   // (located at `card + object >> kCardShift`).
5271   //
5272   // This dual use of the value in register `card` (1. to calculate the location
5273   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5274   // (no need to explicitly load `kCardDirty` as an immediate value).
5275   __ movb(Address(temp, card, TIMES_1, 0),
5276           X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
5277   if (value_can_be_null) {
5278     __ Bind(&is_null);
5279   }
5280 }
5281 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5282 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
5283   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5284 
5285   bool object_field_get_with_read_barrier =
5286       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5287   LocationSummary* locations =
5288       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5289                                                        kEmitCompilerReadBarrier
5290                                                            ? LocationSummary::kCallOnSlowPath
5291                                                            : LocationSummary::kNoCall);
5292   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5293     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5294   }
5295   locations->SetInAt(0, Location::RequiresRegister());
5296 
5297   if (DataType::IsFloatingPointType(instruction->GetType())) {
5298     locations->SetOut(Location::RequiresFpuRegister());
5299   } else {
5300     // The output overlaps in case of long: we don't want the low move
5301     // to overwrite the object's location.  Likewise, in the case of
5302     // an object field get with read barriers enabled, we do not want
5303     // the move to overwrite the object's location, as we need it to emit
5304     // the read barrier.
5305     locations->SetOut(
5306         Location::RequiresRegister(),
5307         (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64) ?
5308             Location::kOutputOverlap :
5309             Location::kNoOutputOverlap);
5310   }
5311 
5312   if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
5313     // Long values can be loaded atomically into an XMM using movsd.
5314     // So we use an XMM register as a temp to achieve atomicity (first
5315     // load the temp into the XMM and then copy the XMM into the
5316     // output, 32 bits at a time).
5317     locations->AddTemp(Location::RequiresFpuRegister());
5318   }
5319 }
5320 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5321 void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
5322                                                  const FieldInfo& field_info) {
5323   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5324 
5325   LocationSummary* locations = instruction->GetLocations();
5326   Location base_loc = locations->InAt(0);
5327   Register base = base_loc.AsRegister<Register>();
5328   Location out = locations->Out();
5329   bool is_volatile = field_info.IsVolatile();
5330   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5331   DataType::Type load_type = instruction->GetType();
5332   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5333 
5334   switch (load_type) {
5335     case DataType::Type::kBool:
5336     case DataType::Type::kUint8: {
5337       __ movzxb(out.AsRegister<Register>(), Address(base, offset));
5338       break;
5339     }
5340 
5341     case DataType::Type::kInt8: {
5342       __ movsxb(out.AsRegister<Register>(), Address(base, offset));
5343       break;
5344     }
5345 
5346     case DataType::Type::kUint16: {
5347       __ movzxw(out.AsRegister<Register>(), Address(base, offset));
5348       break;
5349     }
5350 
5351     case DataType::Type::kInt16: {
5352       __ movsxw(out.AsRegister<Register>(), Address(base, offset));
5353       break;
5354     }
5355 
5356     case DataType::Type::kInt32:
5357       __ movl(out.AsRegister<Register>(), Address(base, offset));
5358       break;
5359 
5360     case DataType::Type::kReference: {
5361       // /* HeapReference<Object> */ out = *(base + offset)
5362       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5363         // Note that a potential implicit null check is handled in this
5364         // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
5365         codegen_->GenerateFieldLoadWithBakerReadBarrier(
5366             instruction, out, base, offset, /* needs_null_check= */ true);
5367         if (is_volatile) {
5368           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5369         }
5370       } else {
5371         __ movl(out.AsRegister<Register>(), Address(base, offset));
5372         codegen_->MaybeRecordImplicitNullCheck(instruction);
5373         if (is_volatile) {
5374           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5375         }
5376         // If read barriers are enabled, emit read barriers other than
5377         // Baker's using a slow path (and also unpoison the loaded
5378         // reference, if heap poisoning is enabled).
5379         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5380       }
5381       break;
5382     }
5383 
5384     case DataType::Type::kInt64: {
5385       if (is_volatile) {
5386         XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5387         __ movsd(temp, Address(base, offset));
5388         codegen_->MaybeRecordImplicitNullCheck(instruction);
5389         __ movd(out.AsRegisterPairLow<Register>(), temp);
5390         __ psrlq(temp, Immediate(32));
5391         __ movd(out.AsRegisterPairHigh<Register>(), temp);
5392       } else {
5393         DCHECK_NE(base, out.AsRegisterPairLow<Register>());
5394         __ movl(out.AsRegisterPairLow<Register>(), Address(base, offset));
5395         codegen_->MaybeRecordImplicitNullCheck(instruction);
5396         __ movl(out.AsRegisterPairHigh<Register>(), Address(base, kX86WordSize + offset));
5397       }
5398       break;
5399     }
5400 
5401     case DataType::Type::kFloat32: {
5402       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5403       break;
5404     }
5405 
5406     case DataType::Type::kFloat64: {
5407       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
5408       break;
5409     }
5410 
5411     case DataType::Type::kUint32:
5412     case DataType::Type::kUint64:
5413     case DataType::Type::kVoid:
5414       LOG(FATAL) << "Unreachable type " << load_type;
5415       UNREACHABLE();
5416   }
5417 
5418   if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) {
5419     // Potential implicit null checks, in the case of reference or
5420     // long fields, are handled in the previous switch statement.
5421   } else {
5422     codegen_->MaybeRecordImplicitNullCheck(instruction);
5423   }
5424 
5425   if (is_volatile) {
5426     if (load_type == DataType::Type::kReference) {
5427       // Memory barriers, in the case of references, are also handled
5428       // in the previous switch statement.
5429     } else {
5430       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5431     }
5432   }
5433 }
5434 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5435 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
5436   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5437 
5438   LocationSummary* locations =
5439       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5440   locations->SetInAt(0, Location::RequiresRegister());
5441   bool is_volatile = field_info.IsVolatile();
5442   DataType::Type field_type = field_info.GetFieldType();
5443   bool is_byte_type = DataType::Size(field_type) == 1u;
5444 
5445   // The register allocator does not support multiple
5446   // inputs that die at entry with one in a specific register.
5447   if (is_byte_type) {
5448     // Ensure the value is in a byte register.
5449     locations->SetInAt(1, Location::RegisterLocation(EAX));
5450   } else if (DataType::IsFloatingPointType(field_type)) {
5451     if (is_volatile && field_type == DataType::Type::kFloat64) {
5452       // In order to satisfy the semantics of volatile, this must be a single instruction store.
5453       locations->SetInAt(1, Location::RequiresFpuRegister());
5454     } else {
5455       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5456     }
5457   } else if (is_volatile && field_type == DataType::Type::kInt64) {
5458     // In order to satisfy the semantics of volatile, this must be a single instruction store.
5459     locations->SetInAt(1, Location::RequiresRegister());
5460 
5461     // 64bits value can be atomically written to an address with movsd and an XMM register.
5462     // We need two XMM registers because there's no easier way to (bit) copy a register pair
5463     // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
5464     // NB: We could make the register allocator understand fp_reg <-> core_reg moves but given the
5465     // isolated cases when we need this it isn't worth adding the extra complexity.
5466     locations->AddTemp(Location::RequiresFpuRegister());
5467     locations->AddTemp(Location::RequiresFpuRegister());
5468   } else {
5469     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5470 
5471     if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
5472       // Temporary registers for the write barrier.
5473       locations->AddTemp(Location::RequiresRegister());  // May be used for reference poisoning too.
5474       // Ensure the card is in a byte register.
5475       locations->AddTemp(Location::RegisterLocation(ECX));
5476     }
5477   }
5478 }
5479 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5480 void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
5481                                                  const FieldInfo& field_info,
5482                                                  bool value_can_be_null) {
5483   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5484 
5485   LocationSummary* locations = instruction->GetLocations();
5486   Register base = locations->InAt(0).AsRegister<Register>();
5487   Location value = locations->InAt(1);
5488   bool is_volatile = field_info.IsVolatile();
5489   DataType::Type field_type = field_info.GetFieldType();
5490   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5491   bool needs_write_barrier =
5492       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5493 
5494   if (is_volatile) {
5495     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5496   }
5497 
5498   bool maybe_record_implicit_null_check_done = false;
5499 
5500   switch (field_type) {
5501     case DataType::Type::kBool:
5502     case DataType::Type::kUint8:
5503     case DataType::Type::kInt8: {
5504       __ movb(Address(base, offset), value.AsRegister<ByteRegister>());
5505       break;
5506     }
5507 
5508     case DataType::Type::kUint16:
5509     case DataType::Type::kInt16: {
5510       if (value.IsConstant()) {
5511         __ movw(Address(base, offset),
5512                 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5513       } else {
5514         __ movw(Address(base, offset), value.AsRegister<Register>());
5515       }
5516       break;
5517     }
5518 
5519     case DataType::Type::kInt32:
5520     case DataType::Type::kReference: {
5521       if (kPoisonHeapReferences && needs_write_barrier) {
5522         // Note that in the case where `value` is a null reference,
5523         // we do not enter this block, as the reference does not
5524         // need poisoning.
5525         DCHECK_EQ(field_type, DataType::Type::kReference);
5526         Register temp = locations->GetTemp(0).AsRegister<Register>();
5527         __ movl(temp, value.AsRegister<Register>());
5528         __ PoisonHeapReference(temp);
5529         __ movl(Address(base, offset), temp);
5530       } else if (value.IsConstant()) {
5531         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5532         __ movl(Address(base, offset), Immediate(v));
5533       } else {
5534         DCHECK(value.IsRegister()) << value;
5535         __ movl(Address(base, offset), value.AsRegister<Register>());
5536       }
5537       break;
5538     }
5539 
5540     case DataType::Type::kInt64: {
5541       if (is_volatile) {
5542         XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
5543         XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
5544         __ movd(temp1, value.AsRegisterPairLow<Register>());
5545         __ movd(temp2, value.AsRegisterPairHigh<Register>());
5546         __ punpckldq(temp1, temp2);
5547         __ movsd(Address(base, offset), temp1);
5548         codegen_->MaybeRecordImplicitNullCheck(instruction);
5549       } else if (value.IsConstant()) {
5550         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5551         __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5552         codegen_->MaybeRecordImplicitNullCheck(instruction);
5553         __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5554       } else {
5555         __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
5556         codegen_->MaybeRecordImplicitNullCheck(instruction);
5557         __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
5558       }
5559       maybe_record_implicit_null_check_done = true;
5560       break;
5561     }
5562 
5563     case DataType::Type::kFloat32: {
5564       if (value.IsConstant()) {
5565         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5566         __ movl(Address(base, offset), Immediate(v));
5567       } else {
5568         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5569       }
5570       break;
5571     }
5572 
5573     case DataType::Type::kFloat64: {
5574       if (value.IsConstant()) {
5575         int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5576         __ movl(Address(base, offset), Immediate(Low32Bits(v)));
5577         codegen_->MaybeRecordImplicitNullCheck(instruction);
5578         __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
5579         maybe_record_implicit_null_check_done = true;
5580       } else {
5581         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
5582       }
5583       break;
5584     }
5585 
5586     case DataType::Type::kUint32:
5587     case DataType::Type::kUint64:
5588     case DataType::Type::kVoid:
5589       LOG(FATAL) << "Unreachable type " << field_type;
5590       UNREACHABLE();
5591   }
5592 
5593   if (!maybe_record_implicit_null_check_done) {
5594     codegen_->MaybeRecordImplicitNullCheck(instruction);
5595   }
5596 
5597   if (needs_write_barrier) {
5598     Register temp = locations->GetTemp(0).AsRegister<Register>();
5599     Register card = locations->GetTemp(1).AsRegister<Register>();
5600     codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
5601   }
5602 
5603   if (is_volatile) {
5604     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5605   }
5606 }
5607 
VisitStaticFieldGet(HStaticFieldGet * instruction)5608 void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5609   HandleFieldGet(instruction, instruction->GetFieldInfo());
5610 }
5611 
VisitStaticFieldGet(HStaticFieldGet * instruction)5612 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5613   HandleFieldGet(instruction, instruction->GetFieldInfo());
5614 }
5615 
VisitStaticFieldSet(HStaticFieldSet * instruction)5616 void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5617   HandleFieldSet(instruction, instruction->GetFieldInfo());
5618 }
5619 
VisitStaticFieldSet(HStaticFieldSet * instruction)5620 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5621   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5622 }
5623 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5624 void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5625   HandleFieldSet(instruction, instruction->GetFieldInfo());
5626 }
5627 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5628 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5629   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5630 }
5631 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5632 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5633   HandleFieldGet(instruction, instruction->GetFieldInfo());
5634 }
5635 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5636 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5637   HandleFieldGet(instruction, instruction->GetFieldInfo());
5638 }
5639 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5640 void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5641   codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
5642 }
5643 
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5644 void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5645   __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
5646   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5647 }
5648 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5649 void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
5650     HUnresolvedInstanceFieldGet* instruction) {
5651   FieldAccessCallingConventionX86 calling_convention;
5652   codegen_->CreateUnresolvedFieldLocationSummary(
5653       instruction, instruction->GetFieldType(), calling_convention);
5654 }
5655 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5656 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet(
5657     HUnresolvedInstanceFieldGet* instruction) {
5658   FieldAccessCallingConventionX86 calling_convention;
5659   codegen_->GenerateUnresolvedFieldAccess(instruction,
5660                                           instruction->GetFieldType(),
5661                                           instruction->GetFieldIndex(),
5662                                           instruction->GetDexPc(),
5663                                           calling_convention);
5664 }
5665 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5666 void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet(
5667     HUnresolvedInstanceFieldSet* instruction) {
5668   FieldAccessCallingConventionX86 calling_convention;
5669   codegen_->CreateUnresolvedFieldLocationSummary(
5670       instruction, instruction->GetFieldType(), calling_convention);
5671 }
5672 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5673 void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet(
5674     HUnresolvedInstanceFieldSet* instruction) {
5675   FieldAccessCallingConventionX86 calling_convention;
5676   codegen_->GenerateUnresolvedFieldAccess(instruction,
5677                                           instruction->GetFieldType(),
5678                                           instruction->GetFieldIndex(),
5679                                           instruction->GetDexPc(),
5680                                           calling_convention);
5681 }
5682 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5683 void LocationsBuilderX86::VisitUnresolvedStaticFieldGet(
5684     HUnresolvedStaticFieldGet* instruction) {
5685   FieldAccessCallingConventionX86 calling_convention;
5686   codegen_->CreateUnresolvedFieldLocationSummary(
5687       instruction, instruction->GetFieldType(), calling_convention);
5688 }
5689 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5690 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet(
5691     HUnresolvedStaticFieldGet* instruction) {
5692   FieldAccessCallingConventionX86 calling_convention;
5693   codegen_->GenerateUnresolvedFieldAccess(instruction,
5694                                           instruction->GetFieldType(),
5695                                           instruction->GetFieldIndex(),
5696                                           instruction->GetDexPc(),
5697                                           calling_convention);
5698 }
5699 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5700 void LocationsBuilderX86::VisitUnresolvedStaticFieldSet(
5701     HUnresolvedStaticFieldSet* instruction) {
5702   FieldAccessCallingConventionX86 calling_convention;
5703   codegen_->CreateUnresolvedFieldLocationSummary(
5704       instruction, instruction->GetFieldType(), calling_convention);
5705 }
5706 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5707 void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet(
5708     HUnresolvedStaticFieldSet* instruction) {
5709   FieldAccessCallingConventionX86 calling_convention;
5710   codegen_->GenerateUnresolvedFieldAccess(instruction,
5711                                           instruction->GetFieldType(),
5712                                           instruction->GetFieldIndex(),
5713                                           instruction->GetDexPc(),
5714                                           calling_convention);
5715 }
5716 
VisitNullCheck(HNullCheck * instruction)5717 void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) {
5718   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5719   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5720       ? Location::RequiresRegister()
5721       : Location::Any();
5722   locations->SetInAt(0, loc);
5723 }
5724 
GenerateImplicitNullCheck(HNullCheck * instruction)5725 void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) {
5726   if (CanMoveNullCheckToUser(instruction)) {
5727     return;
5728   }
5729   LocationSummary* locations = instruction->GetLocations();
5730   Location obj = locations->InAt(0);
5731 
5732   __ testl(EAX, Address(obj.AsRegister<Register>(), 0));
5733   RecordPcInfo(instruction, instruction->GetDexPc());
5734 }
5735 
GenerateExplicitNullCheck(HNullCheck * instruction)5736 void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) {
5737   SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction);
5738   AddSlowPath(slow_path);
5739 
5740   LocationSummary* locations = instruction->GetLocations();
5741   Location obj = locations->InAt(0);
5742 
5743   if (obj.IsRegister()) {
5744     __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
5745   } else if (obj.IsStackSlot()) {
5746     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
5747   } else {
5748     DCHECK(obj.IsConstant()) << obj;
5749     DCHECK(obj.GetConstant()->IsNullConstant());
5750     __ jmp(slow_path->GetEntryLabel());
5751     return;
5752   }
5753   __ j(kEqual, slow_path->GetEntryLabel());
5754 }
5755 
VisitNullCheck(HNullCheck * instruction)5756 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
5757   codegen_->GenerateNullCheck(instruction);
5758 }
5759 
VisitArrayGet(HArrayGet * instruction)5760 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
5761   bool object_array_get_with_read_barrier =
5762       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5763   LocationSummary* locations =
5764       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5765                                                        object_array_get_with_read_barrier
5766                                                            ? LocationSummary::kCallOnSlowPath
5767                                                            : LocationSummary::kNoCall);
5768   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5769     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5770   }
5771   locations->SetInAt(0, Location::RequiresRegister());
5772   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5773   if (DataType::IsFloatingPointType(instruction->GetType())) {
5774     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5775   } else {
5776     // The output overlaps in case of long: we don't want the low move
5777     // to overwrite the array's location.  Likewise, in the case of an
5778     // object array get with read barriers enabled, we do not want the
5779     // move to overwrite the array's location, as we need it to emit
5780     // the read barrier.
5781     locations->SetOut(
5782         Location::RequiresRegister(),
5783         (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier)
5784             ? Location::kOutputOverlap
5785             : Location::kNoOutputOverlap);
5786   }
5787 }
5788 
VisitArrayGet(HArrayGet * instruction)5789 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
5790   LocationSummary* locations = instruction->GetLocations();
5791   Location obj_loc = locations->InAt(0);
5792   Register obj = obj_loc.AsRegister<Register>();
5793   Location index = locations->InAt(1);
5794   Location out_loc = locations->Out();
5795   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5796 
5797   DataType::Type type = instruction->GetType();
5798   switch (type) {
5799     case DataType::Type::kBool:
5800     case DataType::Type::kUint8: {
5801       Register out = out_loc.AsRegister<Register>();
5802       __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5803       break;
5804     }
5805 
5806     case DataType::Type::kInt8: {
5807       Register out = out_loc.AsRegister<Register>();
5808       __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5809       break;
5810     }
5811 
5812     case DataType::Type::kUint16: {
5813       Register out = out_loc.AsRegister<Register>();
5814       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5815         // Branch cases into compressed and uncompressed for each index's type.
5816         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5817         NearLabel done, not_compressed;
5818         __ testb(Address(obj, count_offset), Immediate(1));
5819         codegen_->MaybeRecordImplicitNullCheck(instruction);
5820         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5821                       "Expecting 0=compressed, 1=uncompressed");
5822         __ j(kNotZero, &not_compressed);
5823         __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
5824         __ jmp(&done);
5825         __ Bind(&not_compressed);
5826         __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5827         __ Bind(&done);
5828       } else {
5829         // Common case for charAt of array of char or when string compression's
5830         // feature is turned off.
5831         __ movzxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5832       }
5833       break;
5834     }
5835 
5836     case DataType::Type::kInt16: {
5837       Register out = out_loc.AsRegister<Register>();
5838       __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset));
5839       break;
5840     }
5841 
5842     case DataType::Type::kInt32: {
5843       Register out = out_loc.AsRegister<Register>();
5844       __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5845       break;
5846     }
5847 
5848     case DataType::Type::kReference: {
5849       static_assert(
5850           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5851           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5852       // /* HeapReference<Object> */ out =
5853       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
5854       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5855         // Note that a potential implicit null check is handled in this
5856         // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
5857         codegen_->GenerateArrayLoadWithBakerReadBarrier(
5858             instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5859       } else {
5860         Register out = out_loc.AsRegister<Register>();
5861         __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5862         codegen_->MaybeRecordImplicitNullCheck(instruction);
5863         // If read barriers are enabled, emit read barriers other than
5864         // Baker's using a slow path (and also unpoison the loaded
5865         // reference, if heap poisoning is enabled).
5866         if (index.IsConstant()) {
5867           uint32_t offset =
5868               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5869           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5870         } else {
5871           codegen_->MaybeGenerateReadBarrierSlow(
5872               instruction, out_loc, out_loc, obj_loc, data_offset, index);
5873         }
5874       }
5875       break;
5876     }
5877 
5878     case DataType::Type::kInt64: {
5879       DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>());
5880       __ movl(out_loc.AsRegisterPairLow<Register>(),
5881               CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5882       codegen_->MaybeRecordImplicitNullCheck(instruction);
5883       __ movl(out_loc.AsRegisterPairHigh<Register>(),
5884               CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset + kX86WordSize));
5885       break;
5886     }
5887 
5888     case DataType::Type::kFloat32: {
5889       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5890       __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
5891       break;
5892     }
5893 
5894     case DataType::Type::kFloat64: {
5895       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5896       __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset));
5897       break;
5898     }
5899 
5900     case DataType::Type::kUint32:
5901     case DataType::Type::kUint64:
5902     case DataType::Type::kVoid:
5903       LOG(FATAL) << "Unreachable type " << type;
5904       UNREACHABLE();
5905   }
5906 
5907   if (type == DataType::Type::kReference || type == DataType::Type::kInt64) {
5908     // Potential implicit null checks, in the case of reference or
5909     // long arrays, are handled in the previous switch statement.
5910   } else {
5911     codegen_->MaybeRecordImplicitNullCheck(instruction);
5912   }
5913 }
5914 
VisitArraySet(HArraySet * instruction)5915 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
5916   DataType::Type value_type = instruction->GetComponentType();
5917 
5918   bool needs_write_barrier =
5919       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5920   bool needs_type_check = instruction->NeedsTypeCheck();
5921 
5922   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5923       instruction,
5924       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5925 
5926   bool is_byte_type = DataType::Size(value_type) == 1u;
5927   // We need the inputs to be different than the output in case of long operation.
5928   // In case of a byte operation, the register allocator does not support multiple
5929   // inputs that die at entry with one in a specific register.
5930   locations->SetInAt(0, Location::RequiresRegister());
5931   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5932   if (is_byte_type) {
5933     // Ensure the value is in a byte register.
5934     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
5935   } else if (DataType::IsFloatingPointType(value_type)) {
5936     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5937   } else {
5938     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5939   }
5940   if (needs_write_barrier) {
5941     // Temporary registers for the write barrier.
5942     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
5943     // Ensure the card is in a byte register.
5944     locations->AddTemp(Location::RegisterLocation(ECX));
5945   }
5946 }
5947 
VisitArraySet(HArraySet * instruction)5948 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
5949   LocationSummary* locations = instruction->GetLocations();
5950   Location array_loc = locations->InAt(0);
5951   Register array = array_loc.AsRegister<Register>();
5952   Location index = locations->InAt(1);
5953   Location value = locations->InAt(2);
5954   DataType::Type value_type = instruction->GetComponentType();
5955   bool needs_type_check = instruction->NeedsTypeCheck();
5956   bool needs_write_barrier =
5957       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5958 
5959   switch (value_type) {
5960     case DataType::Type::kBool:
5961     case DataType::Type::kUint8:
5962     case DataType::Type::kInt8: {
5963       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5964       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset);
5965       if (value.IsRegister()) {
5966         __ movb(address, value.AsRegister<ByteRegister>());
5967       } else {
5968         __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5969       }
5970       codegen_->MaybeRecordImplicitNullCheck(instruction);
5971       break;
5972     }
5973 
5974     case DataType::Type::kUint16:
5975     case DataType::Type::kInt16: {
5976       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5977       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset);
5978       if (value.IsRegister()) {
5979         __ movw(address, value.AsRegister<Register>());
5980       } else {
5981         __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5982       }
5983       codegen_->MaybeRecordImplicitNullCheck(instruction);
5984       break;
5985     }
5986 
5987     case DataType::Type::kReference: {
5988       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5989       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
5990 
5991       if (!value.IsRegister()) {
5992         // Just setting null.
5993         DCHECK(instruction->InputAt(2)->IsNullConstant());
5994         DCHECK(value.IsConstant()) << value;
5995         __ movl(address, Immediate(0));
5996         codegen_->MaybeRecordImplicitNullCheck(instruction);
5997         DCHECK(!needs_write_barrier);
5998         DCHECK(!needs_type_check);
5999         break;
6000       }
6001 
6002       DCHECK(needs_write_barrier);
6003       Register register_value = value.AsRegister<Register>();
6004       Location temp_loc = locations->GetTemp(0);
6005       Register temp = temp_loc.AsRegister<Register>();
6006 
6007       bool can_value_be_null = instruction->GetValueCanBeNull();
6008       NearLabel do_store;
6009       if (can_value_be_null) {
6010         __ testl(register_value, register_value);
6011         __ j(kEqual, &do_store);
6012       }
6013 
6014       SlowPathCode* slow_path = nullptr;
6015       if (needs_type_check) {
6016         slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction);
6017         codegen_->AddSlowPath(slow_path);
6018 
6019         const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6020         const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6021         const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6022 
6023         // Note that when Baker read barriers are enabled, the type
6024         // checks are performed without read barriers.  This is fine,
6025         // even in the case where a class object is in the from-space
6026         // after the flip, as a comparison involving such a type would
6027         // not produce a false positive; it may of course produce a
6028         // false negative, in which case we would take the ArraySet
6029         // slow path.
6030 
6031         // /* HeapReference<Class> */ temp = array->klass_
6032         __ movl(temp, Address(array, class_offset));
6033         codegen_->MaybeRecordImplicitNullCheck(instruction);
6034         __ MaybeUnpoisonHeapReference(temp);
6035 
6036         // /* HeapReference<Class> */ temp = temp->component_type_
6037         __ movl(temp, Address(temp, component_offset));
6038         // If heap poisoning is enabled, no need to unpoison `temp`
6039         // nor the object reference in `register_value->klass`, as
6040         // we are comparing two poisoned references.
6041         __ cmpl(temp, Address(register_value, class_offset));
6042 
6043         if (instruction->StaticTypeOfArrayIsObjectArray()) {
6044           NearLabel do_put;
6045           __ j(kEqual, &do_put);
6046           // If heap poisoning is enabled, the `temp` reference has
6047           // not been unpoisoned yet; unpoison it now.
6048           __ MaybeUnpoisonHeapReference(temp);
6049 
6050           // If heap poisoning is enabled, no need to unpoison the
6051           // heap reference loaded below, as it is only used for a
6052           // comparison with null.
6053           __ cmpl(Address(temp, super_offset), Immediate(0));
6054           __ j(kNotEqual, slow_path->GetEntryLabel());
6055           __ Bind(&do_put);
6056         } else {
6057           __ j(kNotEqual, slow_path->GetEntryLabel());
6058         }
6059       }
6060 
6061       Register card = locations->GetTemp(1).AsRegister<Register>();
6062       codegen_->MarkGCCard(
6063           temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
6064 
6065       if (can_value_be_null) {
6066         DCHECK(do_store.IsLinked());
6067         __ Bind(&do_store);
6068       }
6069 
6070       Register source = register_value;
6071       if (kPoisonHeapReferences) {
6072         __ movl(temp, register_value);
6073         __ PoisonHeapReference(temp);
6074         source = temp;
6075       }
6076 
6077       __ movl(address, source);
6078 
6079       if (can_value_be_null || !needs_type_check) {
6080         codegen_->MaybeRecordImplicitNullCheck(instruction);
6081       }
6082 
6083       if (slow_path != nullptr) {
6084         __ Bind(slow_path->GetExitLabel());
6085       }
6086 
6087       break;
6088     }
6089 
6090     case DataType::Type::kInt32: {
6091       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6092       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6093       if (value.IsRegister()) {
6094         __ movl(address, value.AsRegister<Register>());
6095       } else {
6096         DCHECK(value.IsConstant()) << value;
6097         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6098         __ movl(address, Immediate(v));
6099       }
6100       codegen_->MaybeRecordImplicitNullCheck(instruction);
6101       break;
6102     }
6103 
6104     case DataType::Type::kInt64: {
6105       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6106       if (value.IsRegisterPair()) {
6107         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6108                 value.AsRegisterPairLow<Register>());
6109         codegen_->MaybeRecordImplicitNullCheck(instruction);
6110         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6111                 value.AsRegisterPairHigh<Register>());
6112       } else {
6113         DCHECK(value.IsConstant());
6114         int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
6115         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset),
6116                 Immediate(Low32Bits(val)));
6117         codegen_->MaybeRecordImplicitNullCheck(instruction);
6118         __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset + kX86WordSize),
6119                 Immediate(High32Bits(val)));
6120       }
6121       break;
6122     }
6123 
6124     case DataType::Type::kFloat32: {
6125       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6126       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset);
6127       if (value.IsFpuRegister()) {
6128         __ movss(address, value.AsFpuRegister<XmmRegister>());
6129       } else {
6130         DCHECK(value.IsConstant());
6131         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6132         __ movl(address, Immediate(v));
6133       }
6134       codegen_->MaybeRecordImplicitNullCheck(instruction);
6135       break;
6136     }
6137 
6138     case DataType::Type::kFloat64: {
6139       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6140       Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset);
6141       if (value.IsFpuRegister()) {
6142         __ movsd(address, value.AsFpuRegister<XmmRegister>());
6143       } else {
6144         DCHECK(value.IsConstant());
6145         Address address_hi =
6146             CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset + kX86WordSize);
6147         int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6148         __ movl(address, Immediate(Low32Bits(v)));
6149         codegen_->MaybeRecordImplicitNullCheck(instruction);
6150         __ movl(address_hi, Immediate(High32Bits(v)));
6151       }
6152       break;
6153     }
6154 
6155     case DataType::Type::kUint32:
6156     case DataType::Type::kUint64:
6157     case DataType::Type::kVoid:
6158       LOG(FATAL) << "Unreachable type " << instruction->GetType();
6159       UNREACHABLE();
6160   }
6161 }
6162 
VisitArrayLength(HArrayLength * instruction)6163 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
6164   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6165   locations->SetInAt(0, Location::RequiresRegister());
6166   if (!instruction->IsEmittedAtUseSite()) {
6167     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6168   }
6169 }
6170 
VisitArrayLength(HArrayLength * instruction)6171 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
6172   if (instruction->IsEmittedAtUseSite()) {
6173     return;
6174   }
6175 
6176   LocationSummary* locations = instruction->GetLocations();
6177   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6178   Register obj = locations->InAt(0).AsRegister<Register>();
6179   Register out = locations->Out().AsRegister<Register>();
6180   __ movl(out, Address(obj, offset));
6181   codegen_->MaybeRecordImplicitNullCheck(instruction);
6182   // Mask out most significant bit in case the array is String's array of char.
6183   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6184     __ shrl(out, Immediate(1));
6185   }
6186 }
6187 
VisitBoundsCheck(HBoundsCheck * instruction)6188 void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6189   RegisterSet caller_saves = RegisterSet::Empty();
6190   InvokeRuntimeCallingConvention calling_convention;
6191   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6192   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6193   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6194   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6195   HInstruction* length = instruction->InputAt(1);
6196   if (!length->IsEmittedAtUseSite()) {
6197     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
6198   }
6199   // Need register to see array's length.
6200   if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6201     locations->AddTemp(Location::RequiresRegister());
6202   }
6203 }
6204 
VisitBoundsCheck(HBoundsCheck * instruction)6205 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
6206   const bool is_string_compressed_char_at =
6207       mirror::kUseStringCompression && instruction->IsStringCharAt();
6208   LocationSummary* locations = instruction->GetLocations();
6209   Location index_loc = locations->InAt(0);
6210   Location length_loc = locations->InAt(1);
6211   SlowPathCode* slow_path =
6212     new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction);
6213 
6214   if (length_loc.IsConstant()) {
6215     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6216     if (index_loc.IsConstant()) {
6217       // BCE will remove the bounds check if we are guarenteed to pass.
6218       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6219       if (index < 0 || index >= length) {
6220         codegen_->AddSlowPath(slow_path);
6221         __ jmp(slow_path->GetEntryLabel());
6222       } else {
6223         // Some optimization after BCE may have generated this, and we should not
6224         // generate a bounds check if it is a valid range.
6225       }
6226       return;
6227     }
6228 
6229     // We have to reverse the jump condition because the length is the constant.
6230     Register index_reg = index_loc.AsRegister<Register>();
6231     __ cmpl(index_reg, Immediate(length));
6232     codegen_->AddSlowPath(slow_path);
6233     __ j(kAboveEqual, slow_path->GetEntryLabel());
6234   } else {
6235     HInstruction* array_length = instruction->InputAt(1);
6236     if (array_length->IsEmittedAtUseSite()) {
6237       // Address the length field in the array.
6238       DCHECK(array_length->IsArrayLength());
6239       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6240       Location array_loc = array_length->GetLocations()->InAt(0);
6241       Address array_len(array_loc.AsRegister<Register>(), len_offset);
6242       if (is_string_compressed_char_at) {
6243         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6244         // the string compression flag) with the in-memory length and avoid the temporary.
6245         Register length_reg = locations->GetTemp(0).AsRegister<Register>();
6246         __ movl(length_reg, array_len);
6247         codegen_->MaybeRecordImplicitNullCheck(array_length);
6248         __ shrl(length_reg, Immediate(1));
6249         codegen_->GenerateIntCompare(length_reg, index_loc);
6250       } else {
6251         // Checking bounds for general case:
6252         // Array of char or string's array with feature compression off.
6253         if (index_loc.IsConstant()) {
6254           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6255           __ cmpl(array_len, Immediate(value));
6256         } else {
6257           __ cmpl(array_len, index_loc.AsRegister<Register>());
6258         }
6259         codegen_->MaybeRecordImplicitNullCheck(array_length);
6260       }
6261     } else {
6262       codegen_->GenerateIntCompare(length_loc, index_loc);
6263     }
6264     codegen_->AddSlowPath(slow_path);
6265     __ j(kBelowEqual, slow_path->GetEntryLabel());
6266   }
6267 }
6268 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)6269 void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
6270   LOG(FATAL) << "Unreachable";
6271 }
6272 
VisitParallelMove(HParallelMove * instruction)6273 void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) {
6274   if (instruction->GetNext()->IsSuspendCheck() &&
6275       instruction->GetBlock()->GetLoopInformation() != nullptr) {
6276     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6277     // The back edge will generate the suspend check.
6278     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6279   }
6280 
6281   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6282 }
6283 
VisitSuspendCheck(HSuspendCheck * instruction)6284 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6285   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6286       instruction, LocationSummary::kCallOnSlowPath);
6287   // In suspend check slow path, usually there are no caller-save registers at all.
6288   // If SIMD instructions are present, however, we force spilling all live SIMD
6289   // registers in full width (since the runtime only saves/restores lower part).
6290   locations->SetCustomSlowPathCallerSaves(
6291       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6292 }
6293 
VisitSuspendCheck(HSuspendCheck * instruction)6294 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
6295   HBasicBlock* block = instruction->GetBlock();
6296   if (block->GetLoopInformation() != nullptr) {
6297     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6298     // The back edge will generate the suspend check.
6299     return;
6300   }
6301   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6302     // The goto will generate the suspend check.
6303     return;
6304   }
6305   GenerateSuspendCheck(instruction, nullptr);
6306 }
6307 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6308 void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
6309                                                        HBasicBlock* successor) {
6310   SuspendCheckSlowPathX86* slow_path =
6311       down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
6312   if (slow_path == nullptr) {
6313     slow_path =
6314         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor);
6315     instruction->SetSlowPath(slow_path);
6316     codegen_->AddSlowPath(slow_path);
6317     if (successor != nullptr) {
6318       DCHECK(successor->IsLoopHeader());
6319     }
6320   } else {
6321     DCHECK_EQ(slow_path->GetSuccessor(), successor);
6322   }
6323 
6324   __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
6325                 Immediate(0));
6326   if (successor == nullptr) {
6327     __ j(kNotEqual, slow_path->GetEntryLabel());
6328     __ Bind(slow_path->GetReturnLabel());
6329   } else {
6330     __ j(kEqual, codegen_->GetLabelOf(successor));
6331     __ jmp(slow_path->GetEntryLabel());
6332   }
6333 }
6334 
GetAssembler() const6335 X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
6336   return codegen_->GetAssembler();
6337 }
6338 
MoveMemoryToMemory(int dst,int src,int number_of_words)6339 void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
6340   ScratchRegisterScope ensure_scratch(
6341       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6342   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6343   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6344 
6345   // Now that temp register is available (possibly spilled), move blocks of memory.
6346   for (int i = 0; i < number_of_words; i++) {
6347     __ movl(temp_reg, Address(ESP, src + stack_offset));
6348     __ movl(Address(ESP, dst + stack_offset), temp_reg);
6349     stack_offset += kX86WordSize;
6350   }
6351 }
6352 
EmitMove(size_t index)6353 void ParallelMoveResolverX86::EmitMove(size_t index) {
6354   MoveOperands* move = moves_[index];
6355   Location source = move->GetSource();
6356   Location destination = move->GetDestination();
6357 
6358   if (source.IsRegister()) {
6359     if (destination.IsRegister()) {
6360       __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6361     } else if (destination.IsFpuRegister()) {
6362       __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
6363     } else {
6364       DCHECK(destination.IsStackSlot());
6365       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
6366     }
6367   } else if (source.IsRegisterPair()) {
6368       size_t elem_size = DataType::Size(DataType::Type::kInt32);
6369       // Create stack space for 2 elements.
6370       __ subl(ESP, Immediate(2 * elem_size));
6371       __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>());
6372       __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>());
6373       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
6374       // And remove the temporary stack space we allocated.
6375       __ addl(ESP, Immediate(2 * elem_size));
6376   } else if (source.IsFpuRegister()) {
6377     if (destination.IsRegister()) {
6378       __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
6379     } else if (destination.IsFpuRegister()) {
6380       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6381     } else if (destination.IsRegisterPair()) {
6382       XmmRegister src_reg = source.AsFpuRegister<XmmRegister>();
6383       __ movd(destination.AsRegisterPairLow<Register>(), src_reg);
6384       __ psrlq(src_reg, Immediate(32));
6385       __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
6386     } else if (destination.IsStackSlot()) {
6387       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6388     } else if (destination.IsDoubleStackSlot()) {
6389       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6390     } else {
6391       DCHECK(destination.IsSIMDStackSlot());
6392       __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
6393     }
6394   } else if (source.IsStackSlot()) {
6395     if (destination.IsRegister()) {
6396       __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
6397     } else if (destination.IsFpuRegister()) {
6398       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6399     } else {
6400       DCHECK(destination.IsStackSlot());
6401       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6402     }
6403   } else if (source.IsDoubleStackSlot()) {
6404     if (destination.IsRegisterPair()) {
6405       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
6406       __ movl(destination.AsRegisterPairHigh<Register>(),
6407               Address(ESP, source.GetHighStackIndex(kX86WordSize)));
6408     } else if (destination.IsFpuRegister()) {
6409       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6410     } else {
6411       DCHECK(destination.IsDoubleStackSlot()) << destination;
6412       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6413     }
6414   } else if (source.IsSIMDStackSlot()) {
6415     if (destination.IsFpuRegister()) {
6416       __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
6417     } else {
6418       DCHECK(destination.IsSIMDStackSlot());
6419       MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6420     }
6421   } else if (source.IsConstant()) {
6422     HConstant* constant = source.GetConstant();
6423     if (constant->IsIntConstant() || constant->IsNullConstant()) {
6424       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6425       if (destination.IsRegister()) {
6426         if (value == 0) {
6427           __ xorl(destination.AsRegister<Register>(), destination.AsRegister<Register>());
6428         } else {
6429           __ movl(destination.AsRegister<Register>(), Immediate(value));
6430         }
6431       } else {
6432         DCHECK(destination.IsStackSlot()) << destination;
6433         __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
6434       }
6435     } else if (constant->IsFloatConstant()) {
6436       float fp_value = constant->AsFloatConstant()->GetValue();
6437       int32_t value = bit_cast<int32_t, float>(fp_value);
6438       Immediate imm(value);
6439       if (destination.IsFpuRegister()) {
6440         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6441         if (value == 0) {
6442           // Easy handling of 0.0.
6443           __ xorps(dest, dest);
6444         } else {
6445           ScratchRegisterScope ensure_scratch(
6446               this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6447           Register temp = static_cast<Register>(ensure_scratch.GetRegister());
6448           __ movl(temp, Immediate(value));
6449           __ movd(dest, temp);
6450         }
6451       } else {
6452         DCHECK(destination.IsStackSlot()) << destination;
6453         __ movl(Address(ESP, destination.GetStackIndex()), imm);
6454       }
6455     } else if (constant->IsLongConstant()) {
6456       int64_t value = constant->AsLongConstant()->GetValue();
6457       int32_t low_value = Low32Bits(value);
6458       int32_t high_value = High32Bits(value);
6459       Immediate low(low_value);
6460       Immediate high(high_value);
6461       if (destination.IsDoubleStackSlot()) {
6462         __ movl(Address(ESP, destination.GetStackIndex()), low);
6463         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6464       } else {
6465         __ movl(destination.AsRegisterPairLow<Register>(), low);
6466         __ movl(destination.AsRegisterPairHigh<Register>(), high);
6467       }
6468     } else {
6469       DCHECK(constant->IsDoubleConstant());
6470       double dbl_value = constant->AsDoubleConstant()->GetValue();
6471       int64_t value = bit_cast<int64_t, double>(dbl_value);
6472       int32_t low_value = Low32Bits(value);
6473       int32_t high_value = High32Bits(value);
6474       Immediate low(low_value);
6475       Immediate high(high_value);
6476       if (destination.IsFpuRegister()) {
6477         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6478         if (value == 0) {
6479           // Easy handling of 0.0.
6480           __ xorpd(dest, dest);
6481         } else {
6482           __ pushl(high);
6483           __ pushl(low);
6484           __ movsd(dest, Address(ESP, 0));
6485           __ addl(ESP, Immediate(8));
6486         }
6487       } else {
6488         DCHECK(destination.IsDoubleStackSlot()) << destination;
6489         __ movl(Address(ESP, destination.GetStackIndex()), low);
6490         __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), high);
6491       }
6492     }
6493   } else {
6494     LOG(FATAL) << "Unimplemented move: " << destination << " <- " << source;
6495   }
6496 }
6497 
Exchange(Register reg,int mem)6498 void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
6499   Register suggested_scratch = reg == EAX ? EBX : EAX;
6500   ScratchRegisterScope ensure_scratch(
6501       this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6502 
6503   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6504   __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
6505   __ movl(Address(ESP, mem + stack_offset), reg);
6506   __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
6507 }
6508 
Exchange32(XmmRegister reg,int mem)6509 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
6510   ScratchRegisterScope ensure_scratch(
6511       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6512 
6513   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
6514   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
6515   __ movl(temp_reg, Address(ESP, mem + stack_offset));
6516   __ movss(Address(ESP, mem + stack_offset), reg);
6517   __ movd(reg, temp_reg);
6518 }
6519 
Exchange128(XmmRegister reg,int mem)6520 void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
6521   size_t extra_slot = 4 * kX86WordSize;
6522   __ subl(ESP, Immediate(extra_slot));
6523   __ movups(Address(ESP, 0), XmmRegister(reg));
6524   ExchangeMemory(0, mem + extra_slot, 4);
6525   __ movups(XmmRegister(reg), Address(ESP, 0));
6526   __ addl(ESP, Immediate(extra_slot));
6527 }
6528 
ExchangeMemory(int mem1,int mem2,int number_of_words)6529 void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
6530   ScratchRegisterScope ensure_scratch1(
6531       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
6532 
6533   Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
6534   ScratchRegisterScope ensure_scratch2(
6535       this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
6536 
6537   int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
6538   stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
6539 
6540   // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6541   for (int i = 0; i < number_of_words; i++) {
6542     __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
6543     __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
6544     __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
6545     __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
6546     stack_offset += kX86WordSize;
6547   }
6548 }
6549 
EmitSwap(size_t index)6550 void ParallelMoveResolverX86::EmitSwap(size_t index) {
6551   MoveOperands* move = moves_[index];
6552   Location source = move->GetSource();
6553   Location destination = move->GetDestination();
6554 
6555   if (source.IsRegister() && destination.IsRegister()) {
6556     // Use XOR swap algorithm to avoid serializing XCHG instruction or using a temporary.
6557     DCHECK_NE(destination.AsRegister<Register>(), source.AsRegister<Register>());
6558     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6559     __ xorl(source.AsRegister<Register>(), destination.AsRegister<Register>());
6560     __ xorl(destination.AsRegister<Register>(), source.AsRegister<Register>());
6561   } else if (source.IsRegister() && destination.IsStackSlot()) {
6562     Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
6563   } else if (source.IsStackSlot() && destination.IsRegister()) {
6564     Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
6565   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6566     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
6567   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6568     // Use XOR Swap algorithm to avoid a temporary.
6569     DCHECK_NE(source.reg(), destination.reg());
6570     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6571     __ xorpd(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6572     __ xorpd(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6573   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6574     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6575   } else if (destination.IsFpuRegister() && source.IsStackSlot()) {
6576     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6577   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6578     // Take advantage of the 16 bytes in the XMM register.
6579     XmmRegister reg = source.AsFpuRegister<XmmRegister>();
6580     Address stack(ESP, destination.GetStackIndex());
6581     // Load the double into the high doubleword.
6582     __ movhpd(reg, stack);
6583 
6584     // Store the low double into the destination.
6585     __ movsd(stack, reg);
6586 
6587     // Move the high double to the low double.
6588     __ psrldq(reg, Immediate(8));
6589   } else if (destination.IsFpuRegister() && source.IsDoubleStackSlot()) {
6590     // Take advantage of the 16 bytes in the XMM register.
6591     XmmRegister reg = destination.AsFpuRegister<XmmRegister>();
6592     Address stack(ESP, source.GetStackIndex());
6593     // Load the double into the high doubleword.
6594     __ movhpd(reg, stack);
6595 
6596     // Store the low double into the destination.
6597     __ movsd(stack, reg);
6598 
6599     // Move the high double to the low double.
6600     __ psrldq(reg, Immediate(8));
6601   } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
6602     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
6603   } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6604     ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
6605   } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6606     Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6607   } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6608     Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6609   } else {
6610     LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
6611   }
6612 }
6613 
SpillScratch(int reg)6614 void ParallelMoveResolverX86::SpillScratch(int reg) {
6615   __ pushl(static_cast<Register>(reg));
6616 }
6617 
RestoreScratch(int reg)6618 void ParallelMoveResolverX86::RestoreScratch(int reg) {
6619   __ popl(static_cast<Register>(reg));
6620 }
6621 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6622 HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
6623     HLoadClass::LoadKind desired_class_load_kind) {
6624   switch (desired_class_load_kind) {
6625     case HLoadClass::LoadKind::kInvalid:
6626       LOG(FATAL) << "UNREACHABLE";
6627       UNREACHABLE();
6628     case HLoadClass::LoadKind::kReferrersClass:
6629       break;
6630     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6631     case HLoadClass::LoadKind::kBootImageRelRo:
6632     case HLoadClass::LoadKind::kBssEntry:
6633       DCHECK(!Runtime::Current()->UseJitCompilation());
6634       break;
6635     case HLoadClass::LoadKind::kJitBootImageAddress:
6636     case HLoadClass::LoadKind::kJitTableAddress:
6637       DCHECK(Runtime::Current()->UseJitCompilation());
6638       break;
6639     case HLoadClass::LoadKind::kRuntimeCall:
6640       break;
6641   }
6642   return desired_class_load_kind;
6643 }
6644 
VisitLoadClass(HLoadClass * cls)6645 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
6646   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6647   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6648     InvokeRuntimeCallingConvention calling_convention;
6649     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6650         cls,
6651         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
6652         Location::RegisterLocation(EAX));
6653     DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
6654     return;
6655   }
6656   DCHECK(!cls->NeedsAccessCheck());
6657 
6658   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6659   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6660       ? LocationSummary::kCallOnSlowPath
6661       : LocationSummary::kNoCall;
6662   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6663   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6664     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
6665   }
6666 
6667   if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
6668       load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
6669       load_kind == HLoadClass::LoadKind::kBootImageRelRo ||
6670       load_kind == HLoadClass::LoadKind::kBssEntry) {
6671     locations->SetInAt(0, Location::RequiresRegister());
6672   }
6673   locations->SetOut(Location::RequiresRegister());
6674   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6675     if (!kUseReadBarrier || kUseBakerReadBarrier) {
6676       // Rely on the type resolution and/or initialization to save everything.
6677       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6678     } else {
6679       // For non-Baker read barrier we have a temp-clobbering call.
6680     }
6681   }
6682 }
6683 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6684 Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
6685                                               dex::TypeIndex type_index,
6686                                               Handle<mirror::Class> handle) {
6687   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6688   // Add a patch entry and return the label.
6689   jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6690   PatchInfo<Label>* info = &jit_class_patches_.back();
6691   return &info->label;
6692 }
6693 
6694 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6695 // move.
VisitLoadClass(HLoadClass * cls)6696 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6697   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6698   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6699     codegen_->GenerateLoadClassRuntimeCall(cls);
6700     return;
6701   }
6702   DCHECK(!cls->NeedsAccessCheck());
6703 
6704   LocationSummary* locations = cls->GetLocations();
6705   Location out_loc = locations->Out();
6706   Register out = out_loc.AsRegister<Register>();
6707 
6708   bool generate_null_check = false;
6709   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6710       ? kWithoutReadBarrier
6711       : kCompilerReadBarrierOption;
6712   switch (load_kind) {
6713     case HLoadClass::LoadKind::kReferrersClass: {
6714       DCHECK(!cls->CanCallRuntime());
6715       DCHECK(!cls->MustGenerateClinitCheck());
6716       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6717       Register current_method = locations->InAt(0).AsRegister<Register>();
6718       GenerateGcRootFieldLoad(
6719           cls,
6720           out_loc,
6721           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6722           /* fixup_label= */ nullptr,
6723           read_barrier_option);
6724       break;
6725     }
6726     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
6727       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6728              codegen_->GetCompilerOptions().IsBootImageExtension());
6729       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6730       Register method_address = locations->InAt(0).AsRegister<Register>();
6731       __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6732       codegen_->RecordBootImageTypePatch(cls);
6733       break;
6734     }
6735     case HLoadClass::LoadKind::kBootImageRelRo: {
6736       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6737       Register method_address = locations->InAt(0).AsRegister<Register>();
6738       __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6739       codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
6740                                           codegen_->GetBootImageOffset(cls));
6741       break;
6742     }
6743     case HLoadClass::LoadKind::kBssEntry: {
6744       Register method_address = locations->InAt(0).AsRegister<Register>();
6745       Address address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6746       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6747       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6748       // No need for memory fence, thanks to the x86 memory model.
6749       generate_null_check = true;
6750       break;
6751     }
6752     case HLoadClass::LoadKind::kJitBootImageAddress: {
6753       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6754       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6755       DCHECK_NE(address, 0u);
6756       __ movl(out, Immediate(address));
6757       break;
6758     }
6759     case HLoadClass::LoadKind::kJitTableAddress: {
6760       Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6761       Label* fixup_label = codegen_->NewJitRootClassPatch(
6762           cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6763       // /* GcRoot<mirror::Class> */ out = *address
6764       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6765       break;
6766     }
6767     case HLoadClass::LoadKind::kRuntimeCall:
6768     case HLoadClass::LoadKind::kInvalid:
6769       LOG(FATAL) << "UNREACHABLE";
6770       UNREACHABLE();
6771   }
6772 
6773   if (generate_null_check || cls->MustGenerateClinitCheck()) {
6774     DCHECK(cls->CanCallRuntime());
6775     SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
6776     codegen_->AddSlowPath(slow_path);
6777 
6778     if (generate_null_check) {
6779       __ testl(out, out);
6780       __ j(kEqual, slow_path->GetEntryLabel());
6781     }
6782 
6783     if (cls->MustGenerateClinitCheck()) {
6784       GenerateClassInitializationCheck(slow_path, out);
6785     } else {
6786       __ Bind(slow_path->GetExitLabel());
6787     }
6788   }
6789 }
6790 
VisitLoadMethodHandle(HLoadMethodHandle * load)6791 void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6792   InvokeRuntimeCallingConvention calling_convention;
6793   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6794   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6795 }
6796 
VisitLoadMethodHandle(HLoadMethodHandle * load)6797 void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6798   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6799 }
6800 
VisitLoadMethodType(HLoadMethodType * load)6801 void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
6802   InvokeRuntimeCallingConvention calling_convention;
6803   Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
6804   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6805 }
6806 
VisitLoadMethodType(HLoadMethodType * load)6807 void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
6808   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6809 }
6810 
VisitClinitCheck(HClinitCheck * check)6811 void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
6812   LocationSummary* locations =
6813       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6814   locations->SetInAt(0, Location::RequiresRegister());
6815   if (check->HasUses()) {
6816     locations->SetOut(Location::SameAsFirstInput());
6817   }
6818   // Rely on the type initialization to save everything we need.
6819   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6820 }
6821 
VisitClinitCheck(HClinitCheck * check)6822 void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
6823   // We assume the class to not be null.
6824   SlowPathCode* slow_path =
6825       new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
6826   codegen_->AddSlowPath(slow_path);
6827   GenerateClassInitializationCheck(slow_path,
6828                                    check->GetLocations()->InAt(0).AsRegister<Register>());
6829 }
6830 
GenerateClassInitializationCheck(SlowPathCode * slow_path,Register class_reg)6831 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
6832     SlowPathCode* slow_path, Register class_reg) {
6833   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6834   const size_t status_byte_offset =
6835       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6836   constexpr uint32_t shifted_visibly_initialized_value =
6837       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
6838 
6839   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
6840   __ j(kBelow, slow_path->GetEntryLabel());
6841   __ Bind(slow_path->GetExitLabel());
6842 }
6843 
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,Register temp)6844 void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6845                                                                     Register temp) {
6846   uint32_t path_to_root = check->GetBitstringPathToRoot();
6847   uint32_t mask = check->GetBitstringMask();
6848   DCHECK(IsPowerOfTwo(mask + 1));
6849   size_t mask_bits = WhichPowerOf2(mask + 1);
6850 
6851   if (mask_bits == 16u) {
6852     // Compare the bitstring in memory.
6853     __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6854   } else {
6855     // /* uint32_t */ temp = temp->status_
6856     __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6857     // Compare the bitstring bits using SUB.
6858     __ subl(temp, Immediate(path_to_root));
6859     // Shift out bits that do not contribute to the comparison.
6860     __ shll(temp, Immediate(32u - mask_bits));
6861   }
6862 }
6863 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6864 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
6865     HLoadString::LoadKind desired_string_load_kind) {
6866   switch (desired_string_load_kind) {
6867     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6868     case HLoadString::LoadKind::kBootImageRelRo:
6869     case HLoadString::LoadKind::kBssEntry:
6870       DCHECK(!Runtime::Current()->UseJitCompilation());
6871       break;
6872     case HLoadString::LoadKind::kJitBootImageAddress:
6873     case HLoadString::LoadKind::kJitTableAddress:
6874       DCHECK(Runtime::Current()->UseJitCompilation());
6875       break;
6876     case HLoadString::LoadKind::kRuntimeCall:
6877       break;
6878   }
6879   return desired_string_load_kind;
6880 }
6881 
VisitLoadString(HLoadString * load)6882 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
6883   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6884   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6885   HLoadString::LoadKind load_kind = load->GetLoadKind();
6886   if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
6887       load_kind == HLoadString::LoadKind::kBootImageRelRo ||
6888       load_kind == HLoadString::LoadKind::kBssEntry) {
6889     locations->SetInAt(0, Location::RequiresRegister());
6890   }
6891   if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
6892     locations->SetOut(Location::RegisterLocation(EAX));
6893   } else {
6894     locations->SetOut(Location::RequiresRegister());
6895     if (load_kind == HLoadString::LoadKind::kBssEntry) {
6896       if (!kUseReadBarrier || kUseBakerReadBarrier) {
6897         // Rely on the pResolveString to save everything.
6898         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6899       } else {
6900         // For non-Baker read barrier we have a temp-clobbering call.
6901       }
6902     }
6903   }
6904 }
6905 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6906 Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file,
6907                                                dex::StringIndex string_index,
6908                                                Handle<mirror::String> handle) {
6909   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6910   // Add a patch entry and return the label.
6911   jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6912   PatchInfo<Label>* info = &jit_string_patches_.back();
6913   return &info->label;
6914 }
6915 
6916 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6917 // move.
VisitLoadString(HLoadString * load)6918 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6919   LocationSummary* locations = load->GetLocations();
6920   Location out_loc = locations->Out();
6921   Register out = out_loc.AsRegister<Register>();
6922 
6923   switch (load->GetLoadKind()) {
6924     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6925       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6926              codegen_->GetCompilerOptions().IsBootImageExtension());
6927       Register method_address = locations->InAt(0).AsRegister<Register>();
6928       __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6929       codegen_->RecordBootImageStringPatch(load);
6930       return;
6931     }
6932     case HLoadString::LoadKind::kBootImageRelRo: {
6933       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6934       Register method_address = locations->InAt(0).AsRegister<Register>();
6935       __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
6936       codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
6937                                           codegen_->GetBootImageOffset(load));
6938       return;
6939     }
6940     case HLoadString::LoadKind::kBssEntry: {
6941       Register method_address = locations->InAt(0).AsRegister<Register>();
6942       Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
6943       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6944       // /* GcRoot<mirror::String> */ out = *address  /* PC-relative */
6945       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6946       // No need for memory fence, thanks to the x86 memory model.
6947       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
6948       codegen_->AddSlowPath(slow_path);
6949       __ testl(out, out);
6950       __ j(kEqual, slow_path->GetEntryLabel());
6951       __ Bind(slow_path->GetExitLabel());
6952       return;
6953     }
6954     case HLoadString::LoadKind::kJitBootImageAddress: {
6955       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6956       DCHECK_NE(address, 0u);
6957       __ movl(out, Immediate(address));
6958       return;
6959     }
6960     case HLoadString::LoadKind::kJitTableAddress: {
6961       Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
6962       Label* fixup_label = codegen_->NewJitRootStringPatch(
6963           load->GetDexFile(), load->GetStringIndex(), load->GetString());
6964       // /* GcRoot<mirror::String> */ out = *address
6965       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6966       return;
6967     }
6968     default:
6969       break;
6970   }
6971 
6972   // TODO: Re-add the compiler code to do string dex cache lookup again.
6973   InvokeRuntimeCallingConvention calling_convention;
6974   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
6975   __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
6976   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
6977   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6978 }
6979 
GetExceptionTlsAddress()6980 static Address GetExceptionTlsAddress() {
6981   return Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>().Int32Value());
6982 }
6983 
VisitLoadException(HLoadException * load)6984 void LocationsBuilderX86::VisitLoadException(HLoadException* load) {
6985   LocationSummary* locations =
6986       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6987   locations->SetOut(Location::RequiresRegister());
6988 }
6989 
VisitLoadException(HLoadException * load)6990 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
6991   __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), GetExceptionTlsAddress());
6992 }
6993 
VisitClearException(HClearException * clear)6994 void LocationsBuilderX86::VisitClearException(HClearException* clear) {
6995   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6996 }
6997 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6998 void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6999   __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
7000 }
7001 
VisitThrow(HThrow * instruction)7002 void LocationsBuilderX86::VisitThrow(HThrow* instruction) {
7003   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7004       instruction, LocationSummary::kCallOnMainOnly);
7005   InvokeRuntimeCallingConvention calling_convention;
7006   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7007 }
7008 
VisitThrow(HThrow * instruction)7009 void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
7010   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
7011   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7012 }
7013 
7014 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)7015 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
7016   if (kEmitCompilerReadBarrier &&
7017       !kUseBakerReadBarrier &&
7018       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7019        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7020        type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7021     return 1;
7022   }
7023   return 0;
7024 }
7025 
7026 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7027 // interface pointer, the current interface is compared in memory.
7028 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)7029 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
7030   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7031     return 2;
7032   }
7033   return 1 + NumberOfInstanceOfTemps(type_check_kind);
7034 }
7035 
VisitInstanceOf(HInstanceOf * instruction)7036 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
7037   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7038   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7039   bool baker_read_barrier_slow_path = false;
7040   switch (type_check_kind) {
7041     case TypeCheckKind::kExactCheck:
7042     case TypeCheckKind::kAbstractClassCheck:
7043     case TypeCheckKind::kClassHierarchyCheck:
7044     case TypeCheckKind::kArrayObjectCheck: {
7045       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
7046       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7047       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
7048       break;
7049     }
7050     case TypeCheckKind::kArrayCheck:
7051     case TypeCheckKind::kUnresolvedCheck:
7052     case TypeCheckKind::kInterfaceCheck:
7053       call_kind = LocationSummary::kCallOnSlowPath;
7054       break;
7055     case TypeCheckKind::kBitstringCheck:
7056       break;
7057   }
7058 
7059   LocationSummary* locations =
7060       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7061   if (baker_read_barrier_slow_path) {
7062     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
7063   }
7064   locations->SetInAt(0, Location::RequiresRegister());
7065   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7066     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7067     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7068     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7069   } else {
7070     locations->SetInAt(1, Location::Any());
7071   }
7072   // Note that TypeCheckSlowPathX86 uses this "out" register too.
7073   locations->SetOut(Location::RequiresRegister());
7074   // When read barriers are enabled, we need a temporary register for some cases.
7075   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
7076 }
7077 
VisitInstanceOf(HInstanceOf * instruction)7078 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
7079   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7080   LocationSummary* locations = instruction->GetLocations();
7081   Location obj_loc = locations->InAt(0);
7082   Register obj = obj_loc.AsRegister<Register>();
7083   Location cls = locations->InAt(1);
7084   Location out_loc = locations->Out();
7085   Register out = out_loc.AsRegister<Register>();
7086   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
7087   DCHECK_LE(num_temps, 1u);
7088   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
7089   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7090   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7091   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7092   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7093   SlowPathCode* slow_path = nullptr;
7094   NearLabel done, zero;
7095 
7096   // Return 0 if `obj` is null.
7097   // Avoid null check if we know obj is not null.
7098   if (instruction->MustDoNullCheck()) {
7099     __ testl(obj, obj);
7100     __ j(kEqual, &zero);
7101   }
7102 
7103   switch (type_check_kind) {
7104     case TypeCheckKind::kExactCheck: {
7105       ReadBarrierOption read_barrier_option =
7106           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7107       // /* HeapReference<Class> */ out = obj->klass_
7108       GenerateReferenceLoadTwoRegisters(instruction,
7109                                         out_loc,
7110                                         obj_loc,
7111                                         class_offset,
7112                                         read_barrier_option);
7113       if (cls.IsRegister()) {
7114         __ cmpl(out, cls.AsRegister<Register>());
7115       } else {
7116         DCHECK(cls.IsStackSlot()) << cls;
7117         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7118       }
7119 
7120       // Classes must be equal for the instanceof to succeed.
7121       __ j(kNotEqual, &zero);
7122       __ movl(out, Immediate(1));
7123       __ jmp(&done);
7124       break;
7125     }
7126 
7127     case TypeCheckKind::kAbstractClassCheck: {
7128       ReadBarrierOption read_barrier_option =
7129           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7130       // /* HeapReference<Class> */ out = obj->klass_
7131       GenerateReferenceLoadTwoRegisters(instruction,
7132                                         out_loc,
7133                                         obj_loc,
7134                                         class_offset,
7135                                         read_barrier_option);
7136       // If the class is abstract, we eagerly fetch the super class of the
7137       // object to avoid doing a comparison we know will fail.
7138       NearLabel loop;
7139       __ Bind(&loop);
7140       // /* HeapReference<Class> */ out = out->super_class_
7141       GenerateReferenceLoadOneRegister(instruction,
7142                                        out_loc,
7143                                        super_offset,
7144                                        maybe_temp_loc,
7145                                        read_barrier_option);
7146       __ testl(out, out);
7147       // If `out` is null, we use it for the result, and jump to `done`.
7148       __ j(kEqual, &done);
7149       if (cls.IsRegister()) {
7150         __ cmpl(out, cls.AsRegister<Register>());
7151       } else {
7152         DCHECK(cls.IsStackSlot()) << cls;
7153         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7154       }
7155       __ j(kNotEqual, &loop);
7156       __ movl(out, Immediate(1));
7157       if (zero.IsLinked()) {
7158         __ jmp(&done);
7159       }
7160       break;
7161     }
7162 
7163     case TypeCheckKind::kClassHierarchyCheck: {
7164       ReadBarrierOption read_barrier_option =
7165           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7166       // /* HeapReference<Class> */ out = obj->klass_
7167       GenerateReferenceLoadTwoRegisters(instruction,
7168                                         out_loc,
7169                                         obj_loc,
7170                                         class_offset,
7171                                         read_barrier_option);
7172       // Walk over the class hierarchy to find a match.
7173       NearLabel loop, success;
7174       __ Bind(&loop);
7175       if (cls.IsRegister()) {
7176         __ cmpl(out, cls.AsRegister<Register>());
7177       } else {
7178         DCHECK(cls.IsStackSlot()) << cls;
7179         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7180       }
7181       __ j(kEqual, &success);
7182       // /* HeapReference<Class> */ out = out->super_class_
7183       GenerateReferenceLoadOneRegister(instruction,
7184                                        out_loc,
7185                                        super_offset,
7186                                        maybe_temp_loc,
7187                                        read_barrier_option);
7188       __ testl(out, out);
7189       __ j(kNotEqual, &loop);
7190       // If `out` is null, we use it for the result, and jump to `done`.
7191       __ jmp(&done);
7192       __ Bind(&success);
7193       __ movl(out, Immediate(1));
7194       if (zero.IsLinked()) {
7195         __ jmp(&done);
7196       }
7197       break;
7198     }
7199 
7200     case TypeCheckKind::kArrayObjectCheck: {
7201       ReadBarrierOption read_barrier_option =
7202           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
7203       // /* HeapReference<Class> */ out = obj->klass_
7204       GenerateReferenceLoadTwoRegisters(instruction,
7205                                         out_loc,
7206                                         obj_loc,
7207                                         class_offset,
7208                                         read_barrier_option);
7209       // Do an exact check.
7210       NearLabel exact_check;
7211       if (cls.IsRegister()) {
7212         __ cmpl(out, cls.AsRegister<Register>());
7213       } else {
7214         DCHECK(cls.IsStackSlot()) << cls;
7215         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7216       }
7217       __ j(kEqual, &exact_check);
7218       // Otherwise, we need to check that the object's class is a non-primitive array.
7219       // /* HeapReference<Class> */ out = out->component_type_
7220       GenerateReferenceLoadOneRegister(instruction,
7221                                        out_loc,
7222                                        component_offset,
7223                                        maybe_temp_loc,
7224                                        read_barrier_option);
7225       __ testl(out, out);
7226       // If `out` is null, we use it for the result, and jump to `done`.
7227       __ j(kEqual, &done);
7228       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7229       __ j(kNotEqual, &zero);
7230       __ Bind(&exact_check);
7231       __ movl(out, Immediate(1));
7232       __ jmp(&done);
7233       break;
7234     }
7235 
7236     case TypeCheckKind::kArrayCheck: {
7237       // No read barrier since the slow path will retry upon failure.
7238       // /* HeapReference<Class> */ out = obj->klass_
7239       GenerateReferenceLoadTwoRegisters(instruction,
7240                                         out_loc,
7241                                         obj_loc,
7242                                         class_offset,
7243                                         kWithoutReadBarrier);
7244       if (cls.IsRegister()) {
7245         __ cmpl(out, cls.AsRegister<Register>());
7246       } else {
7247         DCHECK(cls.IsStackSlot()) << cls;
7248         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
7249       }
7250       DCHECK(locations->OnlyCallsOnSlowPath());
7251       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7252           instruction, /* is_fatal= */ false);
7253       codegen_->AddSlowPath(slow_path);
7254       __ j(kNotEqual, slow_path->GetEntryLabel());
7255       __ movl(out, Immediate(1));
7256       if (zero.IsLinked()) {
7257         __ jmp(&done);
7258       }
7259       break;
7260     }
7261 
7262     case TypeCheckKind::kUnresolvedCheck:
7263     case TypeCheckKind::kInterfaceCheck: {
7264       // Note that we indeed only call on slow path, but we always go
7265       // into the slow path for the unresolved and interface check
7266       // cases.
7267       //
7268       // We cannot directly call the InstanceofNonTrivial runtime
7269       // entry point without resorting to a type checking slow path
7270       // here (i.e. by calling InvokeRuntime directly), as it would
7271       // require to assign fixed registers for the inputs of this
7272       // HInstanceOf instruction (following the runtime calling
7273       // convention), which might be cluttered by the potential first
7274       // read barrier emission at the beginning of this method.
7275       //
7276       // TODO: Introduce a new runtime entry point taking the object
7277       // to test (instead of its class) as argument, and let it deal
7278       // with the read barrier issues. This will let us refactor this
7279       // case of the `switch` code as it was previously (with a direct
7280       // call to the runtime not using a type checking slow path).
7281       // This should also be beneficial for the other cases above.
7282       DCHECK(locations->OnlyCallsOnSlowPath());
7283       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7284           instruction, /* is_fatal= */ false);
7285       codegen_->AddSlowPath(slow_path);
7286       __ jmp(slow_path->GetEntryLabel());
7287       if (zero.IsLinked()) {
7288         __ jmp(&done);
7289       }
7290       break;
7291     }
7292 
7293     case TypeCheckKind::kBitstringCheck: {
7294       // /* HeapReference<Class> */ temp = obj->klass_
7295       GenerateReferenceLoadTwoRegisters(instruction,
7296                                         out_loc,
7297                                         obj_loc,
7298                                         class_offset,
7299                                         kWithoutReadBarrier);
7300 
7301       GenerateBitstringTypeCheckCompare(instruction, out);
7302       __ j(kNotEqual, &zero);
7303       __ movl(out, Immediate(1));
7304       __ jmp(&done);
7305       break;
7306     }
7307   }
7308 
7309   if (zero.IsLinked()) {
7310     __ Bind(&zero);
7311     __ xorl(out, out);
7312   }
7313 
7314   if (done.IsLinked()) {
7315     __ Bind(&done);
7316   }
7317 
7318   if (slow_path != nullptr) {
7319     __ Bind(slow_path->GetExitLabel());
7320   }
7321 }
7322 
VisitCheckCast(HCheckCast * instruction)7323 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
7324   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7325   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
7326   LocationSummary* locations =
7327       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7328   locations->SetInAt(0, Location::RequiresRegister());
7329   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7330     // Require a register for the interface check since there is a loop that compares the class to
7331     // a memory address.
7332     locations->SetInAt(1, Location::RequiresRegister());
7333   } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7334     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7335     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7336     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7337   } else {
7338     locations->SetInAt(1, Location::Any());
7339   }
7340   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7341   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7342 }
7343 
VisitCheckCast(HCheckCast * instruction)7344 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
7345   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7346   LocationSummary* locations = instruction->GetLocations();
7347   Location obj_loc = locations->InAt(0);
7348   Register obj = obj_loc.AsRegister<Register>();
7349   Location cls = locations->InAt(1);
7350   Location temp_loc = locations->GetTemp(0);
7351   Register temp = temp_loc.AsRegister<Register>();
7352   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7353   DCHECK_GE(num_temps, 1u);
7354   DCHECK_LE(num_temps, 2u);
7355   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
7356   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7357   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7358   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7359   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7360   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7361   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7362   const uint32_t object_array_data_offset =
7363       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7364 
7365   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7366   SlowPathCode* type_check_slow_path =
7367       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
7368           instruction, is_type_check_slow_path_fatal);
7369   codegen_->AddSlowPath(type_check_slow_path);
7370 
7371   NearLabel done;
7372   // Avoid null check if we know obj is not null.
7373   if (instruction->MustDoNullCheck()) {
7374     __ testl(obj, obj);
7375     __ j(kEqual, &done);
7376   }
7377 
7378   switch (type_check_kind) {
7379     case TypeCheckKind::kExactCheck:
7380     case TypeCheckKind::kArrayCheck: {
7381       // /* HeapReference<Class> */ temp = obj->klass_
7382       GenerateReferenceLoadTwoRegisters(instruction,
7383                                         temp_loc,
7384                                         obj_loc,
7385                                         class_offset,
7386                                         kWithoutReadBarrier);
7387 
7388       if (cls.IsRegister()) {
7389         __ cmpl(temp, cls.AsRegister<Register>());
7390       } else {
7391         DCHECK(cls.IsStackSlot()) << cls;
7392         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7393       }
7394       // Jump to slow path for throwing the exception or doing a
7395       // more involved array check.
7396       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7397       break;
7398     }
7399 
7400     case TypeCheckKind::kAbstractClassCheck: {
7401       // /* HeapReference<Class> */ temp = obj->klass_
7402       GenerateReferenceLoadTwoRegisters(instruction,
7403                                         temp_loc,
7404                                         obj_loc,
7405                                         class_offset,
7406                                         kWithoutReadBarrier);
7407 
7408       // If the class is abstract, we eagerly fetch the super class of the
7409       // object to avoid doing a comparison we know will fail.
7410       NearLabel loop;
7411       __ Bind(&loop);
7412       // /* HeapReference<Class> */ temp = temp->super_class_
7413       GenerateReferenceLoadOneRegister(instruction,
7414                                        temp_loc,
7415                                        super_offset,
7416                                        maybe_temp2_loc,
7417                                        kWithoutReadBarrier);
7418 
7419       // If the class reference currently in `temp` is null, jump to the slow path to throw the
7420       // exception.
7421       __ testl(temp, temp);
7422       __ j(kZero, type_check_slow_path->GetEntryLabel());
7423 
7424       // Otherwise, compare the classes
7425       if (cls.IsRegister()) {
7426         __ cmpl(temp, cls.AsRegister<Register>());
7427       } else {
7428         DCHECK(cls.IsStackSlot()) << cls;
7429         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7430       }
7431       __ j(kNotEqual, &loop);
7432       break;
7433     }
7434 
7435     case TypeCheckKind::kClassHierarchyCheck: {
7436       // /* HeapReference<Class> */ temp = obj->klass_
7437       GenerateReferenceLoadTwoRegisters(instruction,
7438                                         temp_loc,
7439                                         obj_loc,
7440                                         class_offset,
7441                                         kWithoutReadBarrier);
7442 
7443       // Walk over the class hierarchy to find a match.
7444       NearLabel loop;
7445       __ Bind(&loop);
7446       if (cls.IsRegister()) {
7447         __ cmpl(temp, cls.AsRegister<Register>());
7448       } else {
7449         DCHECK(cls.IsStackSlot()) << cls;
7450         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7451       }
7452       __ j(kEqual, &done);
7453 
7454       // /* HeapReference<Class> */ temp = temp->super_class_
7455       GenerateReferenceLoadOneRegister(instruction,
7456                                        temp_loc,
7457                                        super_offset,
7458                                        maybe_temp2_loc,
7459                                        kWithoutReadBarrier);
7460 
7461       // If the class reference currently in `temp` is not null, jump
7462       // back at the beginning of the loop.
7463       __ testl(temp, temp);
7464       __ j(kNotZero, &loop);
7465       // Otherwise, jump to the slow path to throw the exception.;
7466       __ jmp(type_check_slow_path->GetEntryLabel());
7467       break;
7468     }
7469 
7470     case TypeCheckKind::kArrayObjectCheck: {
7471       // /* HeapReference<Class> */ temp = obj->klass_
7472       GenerateReferenceLoadTwoRegisters(instruction,
7473                                         temp_loc,
7474                                         obj_loc,
7475                                         class_offset,
7476                                         kWithoutReadBarrier);
7477 
7478       // Do an exact check.
7479       if (cls.IsRegister()) {
7480         __ cmpl(temp, cls.AsRegister<Register>());
7481       } else {
7482         DCHECK(cls.IsStackSlot()) << cls;
7483         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
7484       }
7485       __ j(kEqual, &done);
7486 
7487       // Otherwise, we need to check that the object's class is a non-primitive array.
7488       // /* HeapReference<Class> */ temp = temp->component_type_
7489       GenerateReferenceLoadOneRegister(instruction,
7490                                        temp_loc,
7491                                        component_offset,
7492                                        maybe_temp2_loc,
7493                                        kWithoutReadBarrier);
7494 
7495       // If the component type is null (i.e. the object not an array),  jump to the slow path to
7496       // throw the exception. Otherwise proceed with the check.
7497       __ testl(temp, temp);
7498       __ j(kZero, type_check_slow_path->GetEntryLabel());
7499 
7500       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7501       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7502       break;
7503     }
7504 
7505     case TypeCheckKind::kUnresolvedCheck:
7506       // We always go into the type check slow path for the unresolved check case.
7507       // We cannot directly call the CheckCast runtime entry point
7508       // without resorting to a type checking slow path here (i.e. by
7509       // calling InvokeRuntime directly), as it would require to
7510       // assign fixed registers for the inputs of this HInstanceOf
7511       // instruction (following the runtime calling convention), which
7512       // might be cluttered by the potential first read barrier
7513       // emission at the beginning of this method.
7514       __ jmp(type_check_slow_path->GetEntryLabel());
7515       break;
7516 
7517     case TypeCheckKind::kInterfaceCheck: {
7518       // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7519       // We can not get false positives by doing this.
7520       // /* HeapReference<Class> */ temp = obj->klass_
7521       GenerateReferenceLoadTwoRegisters(instruction,
7522                                         temp_loc,
7523                                         obj_loc,
7524                                         class_offset,
7525                                         kWithoutReadBarrier);
7526 
7527       // /* HeapReference<Class> */ temp = temp->iftable_
7528       GenerateReferenceLoadTwoRegisters(instruction,
7529                                         temp_loc,
7530                                         temp_loc,
7531                                         iftable_offset,
7532                                         kWithoutReadBarrier);
7533       // Iftable is never null.
7534       __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
7535       // Maybe poison the `cls` for direct comparison with memory.
7536       __ MaybePoisonHeapReference(cls.AsRegister<Register>());
7537       // Loop through the iftable and check if any class matches.
7538       NearLabel start_loop;
7539       __ Bind(&start_loop);
7540       // Need to subtract first to handle the empty array case.
7541       __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
7542       __ j(kNegative, type_check_slow_path->GetEntryLabel());
7543       // Go to next interface if the classes do not match.
7544       __ cmpl(cls.AsRegister<Register>(),
7545               CodeGeneratorX86::ArrayAddress(temp,
7546                                              maybe_temp2_loc,
7547                                              TIMES_4,
7548                                              object_array_data_offset));
7549       __ j(kNotEqual, &start_loop);
7550       // If `cls` was poisoned above, unpoison it.
7551       __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
7552       break;
7553     }
7554 
7555     case TypeCheckKind::kBitstringCheck: {
7556       // /* HeapReference<Class> */ temp = obj->klass_
7557       GenerateReferenceLoadTwoRegisters(instruction,
7558                                         temp_loc,
7559                                         obj_loc,
7560                                         class_offset,
7561                                         kWithoutReadBarrier);
7562 
7563       GenerateBitstringTypeCheckCompare(instruction, temp);
7564       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7565       break;
7566     }
7567   }
7568   __ Bind(&done);
7569 
7570   __ Bind(type_check_slow_path->GetExitLabel());
7571 }
7572 
VisitMonitorOperation(HMonitorOperation * instruction)7573 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7574   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7575       instruction, LocationSummary::kCallOnMainOnly);
7576   InvokeRuntimeCallingConvention calling_convention;
7577   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7578 }
7579 
VisitMonitorOperation(HMonitorOperation * instruction)7580 void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instruction) {
7581   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject
7582                                                  : kQuickUnlockObject,
7583                           instruction,
7584                           instruction->GetDexPc());
7585   if (instruction->IsEnter()) {
7586     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7587   } else {
7588     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7589   }
7590 }
7591 
VisitX86AndNot(HX86AndNot * instruction)7592 void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
7593   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7594   DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7595   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7596   locations->SetInAt(0, Location::RequiresRegister());
7597   locations->SetInAt(1, Location::RequiresRegister());
7598   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7599 }
7600 
VisitX86AndNot(HX86AndNot * instruction)7601 void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
7602   LocationSummary* locations = instruction->GetLocations();
7603   Location first = locations->InAt(0);
7604   Location second = locations->InAt(1);
7605   Location dest = locations->Out();
7606   if (instruction->GetResultType() == DataType::Type::kInt32) {
7607     __ andn(dest.AsRegister<Register>(),
7608             first.AsRegister<Register>(),
7609             second.AsRegister<Register>());
7610   } else {
7611     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7612     __ andn(dest.AsRegisterPairLow<Register>(),
7613             first.AsRegisterPairLow<Register>(),
7614             second.AsRegisterPairLow<Register>());
7615     __ andn(dest.AsRegisterPairHigh<Register>(),
7616             first.AsRegisterPairHigh<Register>(),
7617             second.AsRegisterPairHigh<Register>());
7618   }
7619 }
7620 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7621 void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7622   DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7623   DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
7624   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7625   locations->SetInAt(0, Location::RequiresRegister());
7626   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7627 }
7628 
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7629 void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
7630     HX86MaskOrResetLeastSetBit* instruction) {
7631   LocationSummary* locations = instruction->GetLocations();
7632   Location src = locations->InAt(0);
7633   Location dest = locations->Out();
7634   DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
7635   switch (instruction->GetOpKind()) {
7636     case HInstruction::kAnd:
7637       __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
7638       break;
7639     case HInstruction::kXor:
7640       __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
7641       break;
7642     default:
7643       LOG(FATAL) << "Unreachable";
7644   }
7645 }
7646 
VisitAnd(HAnd * instruction)7647 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7648 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7649 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7650 
HandleBitwiseOperation(HBinaryOperation * instruction)7651 void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7652   LocationSummary* locations =
7653       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7654   DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7655          || instruction->GetResultType() == DataType::Type::kInt64);
7656   locations->SetInAt(0, Location::RequiresRegister());
7657   locations->SetInAt(1, Location::Any());
7658   locations->SetOut(Location::SameAsFirstInput());
7659 }
7660 
VisitAnd(HAnd * instruction)7661 void InstructionCodeGeneratorX86::VisitAnd(HAnd* instruction) {
7662   HandleBitwiseOperation(instruction);
7663 }
7664 
VisitOr(HOr * instruction)7665 void InstructionCodeGeneratorX86::VisitOr(HOr* instruction) {
7666   HandleBitwiseOperation(instruction);
7667 }
7668 
VisitXor(HXor * instruction)7669 void InstructionCodeGeneratorX86::VisitXor(HXor* instruction) {
7670   HandleBitwiseOperation(instruction);
7671 }
7672 
HandleBitwiseOperation(HBinaryOperation * instruction)7673 void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instruction) {
7674   LocationSummary* locations = instruction->GetLocations();
7675   Location first = locations->InAt(0);
7676   Location second = locations->InAt(1);
7677   DCHECK(first.Equals(locations->Out()));
7678 
7679   if (instruction->GetResultType() == DataType::Type::kInt32) {
7680     if (second.IsRegister()) {
7681       if (instruction->IsAnd()) {
7682         __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
7683       } else if (instruction->IsOr()) {
7684         __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
7685       } else {
7686         DCHECK(instruction->IsXor());
7687         __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
7688       }
7689     } else if (second.IsConstant()) {
7690       if (instruction->IsAnd()) {
7691         __ andl(first.AsRegister<Register>(),
7692                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7693       } else if (instruction->IsOr()) {
7694         __ orl(first.AsRegister<Register>(),
7695                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7696       } else {
7697         DCHECK(instruction->IsXor());
7698         __ xorl(first.AsRegister<Register>(),
7699                 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
7700       }
7701     } else {
7702       if (instruction->IsAnd()) {
7703         __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7704       } else if (instruction->IsOr()) {
7705         __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7706       } else {
7707         DCHECK(instruction->IsXor());
7708         __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
7709       }
7710     }
7711   } else {
7712     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7713     if (second.IsRegisterPair()) {
7714       if (instruction->IsAnd()) {
7715         __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7716         __ andl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7717       } else if (instruction->IsOr()) {
7718         __ orl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7719         __ orl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7720       } else {
7721         DCHECK(instruction->IsXor());
7722         __ xorl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
7723         __ xorl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
7724       }
7725     } else if (second.IsDoubleStackSlot()) {
7726       if (instruction->IsAnd()) {
7727         __ andl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7728         __ andl(first.AsRegisterPairHigh<Register>(),
7729                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7730       } else if (instruction->IsOr()) {
7731         __ orl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7732         __ orl(first.AsRegisterPairHigh<Register>(),
7733                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7734       } else {
7735         DCHECK(instruction->IsXor());
7736         __ xorl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
7737         __ xorl(first.AsRegisterPairHigh<Register>(),
7738                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
7739       }
7740     } else {
7741       DCHECK(second.IsConstant()) << second;
7742       int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
7743       int32_t low_value = Low32Bits(value);
7744       int32_t high_value = High32Bits(value);
7745       Immediate low(low_value);
7746       Immediate high(high_value);
7747       Register first_low = first.AsRegisterPairLow<Register>();
7748       Register first_high = first.AsRegisterPairHigh<Register>();
7749       if (instruction->IsAnd()) {
7750         if (low_value == 0) {
7751           __ xorl(first_low, first_low);
7752         } else if (low_value != -1) {
7753           __ andl(first_low, low);
7754         }
7755         if (high_value == 0) {
7756           __ xorl(first_high, first_high);
7757         } else if (high_value != -1) {
7758           __ andl(first_high, high);
7759         }
7760       } else if (instruction->IsOr()) {
7761         if (low_value != 0) {
7762           __ orl(first_low, low);
7763         }
7764         if (high_value != 0) {
7765           __ orl(first_high, high);
7766         }
7767       } else {
7768         DCHECK(instruction->IsXor());
7769         if (low_value != 0) {
7770           __ xorl(first_low, low);
7771         }
7772         if (high_value != 0) {
7773           __ xorl(first_high, high);
7774         }
7775       }
7776     }
7777   }
7778 }
7779 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7780 void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
7781     HInstruction* instruction,
7782     Location out,
7783     uint32_t offset,
7784     Location maybe_temp,
7785     ReadBarrierOption read_barrier_option) {
7786   Register out_reg = out.AsRegister<Register>();
7787   if (read_barrier_option == kWithReadBarrier) {
7788     CHECK(kEmitCompilerReadBarrier);
7789     if (kUseBakerReadBarrier) {
7790       // Load with fast path based Baker's read barrier.
7791       // /* HeapReference<Object> */ out = *(out + offset)
7792       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7793           instruction, out, out_reg, offset, /* needs_null_check= */ false);
7794     } else {
7795       // Load with slow path based read barrier.
7796       // Save the value of `out` into `maybe_temp` before overwriting it
7797       // in the following move operation, as we will need it for the
7798       // read barrier below.
7799       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7800       __ movl(maybe_temp.AsRegister<Register>(), out_reg);
7801       // /* HeapReference<Object> */ out = *(out + offset)
7802       __ movl(out_reg, Address(out_reg, offset));
7803       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7804     }
7805   } else {
7806     // Plain load with no read barrier.
7807     // /* HeapReference<Object> */ out = *(out + offset)
7808     __ movl(out_reg, Address(out_reg, offset));
7809     __ MaybeUnpoisonHeapReference(out_reg);
7810   }
7811 }
7812 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7813 void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
7814     HInstruction* instruction,
7815     Location out,
7816     Location obj,
7817     uint32_t offset,
7818     ReadBarrierOption read_barrier_option) {
7819   Register out_reg = out.AsRegister<Register>();
7820   Register obj_reg = obj.AsRegister<Register>();
7821   if (read_barrier_option == kWithReadBarrier) {
7822     CHECK(kEmitCompilerReadBarrier);
7823     if (kUseBakerReadBarrier) {
7824       // Load with fast path based Baker's read barrier.
7825       // /* HeapReference<Object> */ out = *(obj + offset)
7826       codegen_->GenerateFieldLoadWithBakerReadBarrier(
7827           instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7828     } else {
7829       // Load with slow path based read barrier.
7830       // /* HeapReference<Object> */ out = *(obj + offset)
7831       __ movl(out_reg, Address(obj_reg, offset));
7832       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7833     }
7834   } else {
7835     // Plain load with no read barrier.
7836     // /* HeapReference<Object> */ out = *(obj + offset)
7837     __ movl(out_reg, Address(obj_reg, offset));
7838     __ MaybeUnpoisonHeapReference(out_reg);
7839   }
7840 }
7841 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7842 void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
7843     HInstruction* instruction,
7844     Location root,
7845     const Address& address,
7846     Label* fixup_label,
7847     ReadBarrierOption read_barrier_option) {
7848   Register root_reg = root.AsRegister<Register>();
7849   if (read_barrier_option == kWithReadBarrier) {
7850     DCHECK(kEmitCompilerReadBarrier);
7851     if (kUseBakerReadBarrier) {
7852       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7853       // Baker's read barrier are used:
7854       //
7855       //   root = obj.field;
7856       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7857       //   if (temp != null) {
7858       //     root = temp(root)
7859       //   }
7860 
7861       // /* GcRoot<mirror::Object> */ root = *address
7862       __ movl(root_reg, address);
7863       if (fixup_label != nullptr) {
7864         __ Bind(fixup_label);
7865       }
7866       static_assert(
7867           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7868           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7869           "have different sizes.");
7870       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7871                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
7872                     "have different sizes.");
7873 
7874       // Slow path marking the GC root `root`.
7875       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
7876           instruction, root, /* unpoison_ref_before_marking= */ false);
7877       codegen_->AddSlowPath(slow_path);
7878 
7879       // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
7880       const int32_t entry_point_offset =
7881           Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
7882       __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
7883       // The entrypoint is null when the GC is not marking.
7884       __ j(kNotEqual, slow_path->GetEntryLabel());
7885       __ Bind(slow_path->GetExitLabel());
7886     } else {
7887       // GC root loaded through a slow path for read barriers other
7888       // than Baker's.
7889       // /* GcRoot<mirror::Object>* */ root = address
7890       __ leal(root_reg, address);
7891       if (fixup_label != nullptr) {
7892         __ Bind(fixup_label);
7893       }
7894       // /* mirror::Object* */ root = root->Read()
7895       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7896     }
7897   } else {
7898     // Plain GC root load with no read barrier.
7899     // /* GcRoot<mirror::Object> */ root = *address
7900     __ movl(root_reg, address);
7901     if (fixup_label != nullptr) {
7902       __ Bind(fixup_label);
7903     }
7904     // Note that GC roots are not affected by heap poisoning, thus we
7905     // do not have to unpoison `root_reg` here.
7906   }
7907 }
7908 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t offset,bool needs_null_check)7909 void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7910                                                              Location ref,
7911                                                              Register obj,
7912                                                              uint32_t offset,
7913                                                              bool needs_null_check) {
7914   DCHECK(kEmitCompilerReadBarrier);
7915   DCHECK(kUseBakerReadBarrier);
7916 
7917   // /* HeapReference<Object> */ ref = *(obj + offset)
7918   Address src(obj, offset);
7919   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7920 }
7921 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,uint32_t data_offset,Location index,bool needs_null_check)7922 void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7923                                                              Location ref,
7924                                                              Register obj,
7925                                                              uint32_t data_offset,
7926                                                              Location index,
7927                                                              bool needs_null_check) {
7928   DCHECK(kEmitCompilerReadBarrier);
7929   DCHECK(kUseBakerReadBarrier);
7930 
7931   static_assert(
7932       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7933       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7934   // /* HeapReference<Object> */ ref =
7935   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
7936   Address src = CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset);
7937   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7938 }
7939 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,Register obj,const Address & src,bool needs_null_check,bool always_update_field,Register * temp)7940 void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7941                                                                  Location ref,
7942                                                                  Register obj,
7943                                                                  const Address& src,
7944                                                                  bool needs_null_check,
7945                                                                  bool always_update_field,
7946                                                                  Register* temp) {
7947   DCHECK(kEmitCompilerReadBarrier);
7948   DCHECK(kUseBakerReadBarrier);
7949 
7950   // In slow path based read barriers, the read barrier call is
7951   // inserted after the original load. However, in fast path based
7952   // Baker's read barriers, we need to perform the load of
7953   // mirror::Object::monitor_ *before* the original reference load.
7954   // This load-load ordering is required by the read barrier.
7955   // The fast path/slow path (for Baker's algorithm) should look like:
7956   //
7957   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7958   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
7959   //   HeapReference<Object> ref = *src;  // Original reference load.
7960   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
7961   //   if (is_gray) {
7962   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
7963   //   }
7964   //
7965   // Note: the original implementation in ReadBarrier::Barrier is
7966   // slightly more complex as:
7967   // - it implements the load-load fence using a data dependency on
7968   //   the high-bits of rb_state, which are expected to be all zeroes
7969   //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
7970   //   which is a no-op thanks to the x86 memory model);
7971   // - it performs additional checks that we do not do here for
7972   //   performance reasons.
7973 
7974   Register ref_reg = ref.AsRegister<Register>();
7975   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7976 
7977   // Given the numeric representation, it's enough to check the low bit of the rb_state.
7978   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7979   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7980   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7981   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7982   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7983 
7984   // if (rb_state == ReadBarrier::GrayState())
7985   //   ref = ReadBarrier::Mark(ref);
7986   // At this point, just do the "if" and make sure that flags are preserved until the branch.
7987   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7988   if (needs_null_check) {
7989     MaybeRecordImplicitNullCheck(instruction);
7990   }
7991 
7992   // Load fence to prevent load-load reordering.
7993   // Note that this is a no-op, thanks to the x86 memory model.
7994   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7995 
7996   // The actual reference load.
7997   // /* HeapReference<Object> */ ref = *src
7998   __ movl(ref_reg, src);  // Flags are unaffected.
7999 
8000   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8001   // Slow path marking the object `ref` when it is gray.
8002   SlowPathCode* slow_path;
8003   if (always_update_field) {
8004     DCHECK(temp != nullptr);
8005     slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
8006         instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
8007   } else {
8008     slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
8009         instruction, ref, /* unpoison_ref_before_marking= */ true);
8010   }
8011   AddSlowPath(slow_path);
8012 
8013   // We have done the "if" of the gray bit check above, now branch based on the flags.
8014   __ j(kNotZero, slow_path->GetEntryLabel());
8015 
8016   // Object* ref = ref_addr->AsMirrorPtr()
8017   __ MaybeUnpoisonHeapReference(ref_reg);
8018 
8019   __ Bind(slow_path->GetExitLabel());
8020 }
8021 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8022 void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
8023                                                Location out,
8024                                                Location ref,
8025                                                Location obj,
8026                                                uint32_t offset,
8027                                                Location index) {
8028   DCHECK(kEmitCompilerReadBarrier);
8029 
8030   // Insert a slow path based read barrier *after* the reference load.
8031   //
8032   // If heap poisoning is enabled, the unpoisoning of the loaded
8033   // reference will be carried out by the runtime within the slow
8034   // path.
8035   //
8036   // Note that `ref` currently does not get unpoisoned (when heap
8037   // poisoning is enabled), which is alright as the `ref` argument is
8038   // not used by the artReadBarrierSlow entry point.
8039   //
8040   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8041   SlowPathCode* slow_path = new (GetScopedAllocator())
8042       ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
8043   AddSlowPath(slow_path);
8044 
8045   __ jmp(slow_path->GetEntryLabel());
8046   __ Bind(slow_path->GetExitLabel());
8047 }
8048 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8049 void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8050                                                     Location out,
8051                                                     Location ref,
8052                                                     Location obj,
8053                                                     uint32_t offset,
8054                                                     Location index) {
8055   if (kEmitCompilerReadBarrier) {
8056     // Baker's read barriers shall be handled by the fast path
8057     // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
8058     DCHECK(!kUseBakerReadBarrier);
8059     // If heap poisoning is enabled, unpoisoning will be taken care of
8060     // by the runtime within the slow path.
8061     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8062   } else if (kPoisonHeapReferences) {
8063     __ UnpoisonHeapReference(out.AsRegister<Register>());
8064   }
8065 }
8066 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8067 void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8068                                                       Location out,
8069                                                       Location root) {
8070   DCHECK(kEmitCompilerReadBarrier);
8071 
8072   // Insert a slow path based read barrier *after* the GC root load.
8073   //
8074   // Note that GC roots are not affected by heap poisoning, so we do
8075   // not need to do anything special for this here.
8076   SlowPathCode* slow_path =
8077       new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root);
8078   AddSlowPath(slow_path);
8079 
8080   __ jmp(slow_path->GetEntryLabel());
8081   __ Bind(slow_path->GetExitLabel());
8082 }
8083 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8084 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8085   // Nothing to do, this should be removed during prepare for register allocator.
8086   LOG(FATAL) << "Unreachable";
8087 }
8088 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)8089 void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
8090   // Nothing to do, this should be removed during prepare for register allocator.
8091   LOG(FATAL) << "Unreachable";
8092 }
8093 
8094 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8095 void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8096   LocationSummary* locations =
8097       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8098   locations->SetInAt(0, Location::RequiresRegister());
8099 }
8100 
GenPackedSwitchWithCompares(Register value_reg,int32_t lower_bound,uint32_t num_entries,HBasicBlock * switch_block,HBasicBlock * default_block)8101 void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
8102                                                               int32_t lower_bound,
8103                                                               uint32_t num_entries,
8104                                                               HBasicBlock* switch_block,
8105                                                               HBasicBlock* default_block) {
8106   // Figure out the correct compare values and jump conditions.
8107   // Handle the first compare/branch as a special case because it might
8108   // jump to the default case.
8109   DCHECK_GT(num_entries, 2u);
8110   Condition first_condition;
8111   uint32_t index;
8112   const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
8113   if (lower_bound != 0) {
8114     first_condition = kLess;
8115     __ cmpl(value_reg, Immediate(lower_bound));
8116     __ j(first_condition, codegen_->GetLabelOf(default_block));
8117     __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8118 
8119     index = 1;
8120   } else {
8121     // Handle all the compare/jumps below.
8122     first_condition = kBelow;
8123     index = 0;
8124   }
8125 
8126   // Handle the rest of the compare/jumps.
8127   for (; index + 1 < num_entries; index += 2) {
8128     int32_t compare_to_value = lower_bound + index + 1;
8129     __ cmpl(value_reg, Immediate(compare_to_value));
8130     // Jump to successors[index] if value < case_value[index].
8131     __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8132     // Jump to successors[index + 1] if value == case_value[index + 1].
8133     __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8134   }
8135 
8136   if (index != num_entries) {
8137     // There are an odd number of entries. Handle the last one.
8138     DCHECK_EQ(index + 1, num_entries);
8139     __ cmpl(value_reg, Immediate(lower_bound + index));
8140     __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8141   }
8142 
8143   // And the default for any other value.
8144   if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
8145     __ jmp(codegen_->GetLabelOf(default_block));
8146   }
8147 }
8148 
VisitPackedSwitch(HPackedSwitch * switch_instr)8149 void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8150   int32_t lower_bound = switch_instr->GetStartValue();
8151   uint32_t num_entries = switch_instr->GetNumEntries();
8152   LocationSummary* locations = switch_instr->GetLocations();
8153   Register value_reg = locations->InAt(0).AsRegister<Register>();
8154 
8155   GenPackedSwitchWithCompares(value_reg,
8156                               lower_bound,
8157                               num_entries,
8158                               switch_instr->GetBlock(),
8159                               switch_instr->GetDefaultBlock());
8160 }
8161 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8162 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8163   LocationSummary* locations =
8164       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8165   locations->SetInAt(0, Location::RequiresRegister());
8166 
8167   // Constant area pointer.
8168   locations->SetInAt(1, Location::RequiresRegister());
8169 
8170   // And the temporary we need.
8171   locations->AddTemp(Location::RequiresRegister());
8172 }
8173 
VisitX86PackedSwitch(HX86PackedSwitch * switch_instr)8174 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
8175   int32_t lower_bound = switch_instr->GetStartValue();
8176   uint32_t num_entries = switch_instr->GetNumEntries();
8177   LocationSummary* locations = switch_instr->GetLocations();
8178   Register value_reg = locations->InAt(0).AsRegister<Register>();
8179   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8180 
8181   if (num_entries <= kPackedSwitchJumpTableThreshold) {
8182     GenPackedSwitchWithCompares(value_reg,
8183                                 lower_bound,
8184                                 num_entries,
8185                                 switch_instr->GetBlock(),
8186                                 default_block);
8187     return;
8188   }
8189 
8190   // Optimizing has a jump area.
8191   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
8192   Register constant_area = locations->InAt(1).AsRegister<Register>();
8193 
8194   // Remove the bias, if needed.
8195   if (lower_bound != 0) {
8196     __ leal(temp_reg, Address(value_reg, -lower_bound));
8197     value_reg = temp_reg;
8198   }
8199 
8200   // Is the value in range?
8201   DCHECK_GE(num_entries, 1u);
8202   __ cmpl(value_reg, Immediate(num_entries - 1));
8203   __ j(kAbove, codegen_->GetLabelOf(default_block));
8204 
8205   // We are in the range of the table.
8206   // Load (target-constant_area) from the jump table, indexing by the value.
8207   __ movl(temp_reg, codegen_->LiteralCaseTable(switch_instr, constant_area, value_reg));
8208 
8209   // Compute the actual target address by adding in constant_area.
8210   __ addl(temp_reg, constant_area);
8211 
8212   // And jump.
8213   __ jmp(temp_reg);
8214 }
8215 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8216 void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress(
8217     HX86ComputeBaseMethodAddress* insn) {
8218   LocationSummary* locations =
8219       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8220   locations->SetOut(Location::RequiresRegister());
8221 }
8222 
VisitX86ComputeBaseMethodAddress(HX86ComputeBaseMethodAddress * insn)8223 void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress(
8224     HX86ComputeBaseMethodAddress* insn) {
8225   LocationSummary* locations = insn->GetLocations();
8226   Register reg = locations->Out().AsRegister<Register>();
8227 
8228   // Generate call to next instruction.
8229   Label next_instruction;
8230   __ call(&next_instruction);
8231   __ Bind(&next_instruction);
8232 
8233   // Remember this offset for later use with constant area.
8234   codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize());
8235 
8236   // Grab the return address off the stack.
8237   __ popl(reg);
8238 }
8239 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8240 void LocationsBuilderX86::VisitX86LoadFromConstantTable(
8241     HX86LoadFromConstantTable* insn) {
8242   LocationSummary* locations =
8243       new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall);
8244 
8245   locations->SetInAt(0, Location::RequiresRegister());
8246   locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant()));
8247 
8248   // If we don't need to be materialized, we only need the inputs to be set.
8249   if (insn->IsEmittedAtUseSite()) {
8250     return;
8251   }
8252 
8253   switch (insn->GetType()) {
8254     case DataType::Type::kFloat32:
8255     case DataType::Type::kFloat64:
8256       locations->SetOut(Location::RequiresFpuRegister());
8257       break;
8258 
8259     case DataType::Type::kInt32:
8260       locations->SetOut(Location::RequiresRegister());
8261       break;
8262 
8263     default:
8264       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8265   }
8266 }
8267 
VisitX86LoadFromConstantTable(HX86LoadFromConstantTable * insn)8268 void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) {
8269   if (insn->IsEmittedAtUseSite()) {
8270     return;
8271   }
8272 
8273   LocationSummary* locations = insn->GetLocations();
8274   Location out = locations->Out();
8275   Register const_area = locations->InAt(0).AsRegister<Register>();
8276   HConstant *value = insn->GetConstant();
8277 
8278   switch (insn->GetType()) {
8279     case DataType::Type::kFloat32:
8280       __ movss(out.AsFpuRegister<XmmRegister>(),
8281                codegen_->LiteralFloatAddress(
8282                   value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8283       break;
8284 
8285     case DataType::Type::kFloat64:
8286       __ movsd(out.AsFpuRegister<XmmRegister>(),
8287                codegen_->LiteralDoubleAddress(
8288                   value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8289       break;
8290 
8291     case DataType::Type::kInt32:
8292       __ movl(out.AsRegister<Register>(),
8293               codegen_->LiteralInt32Address(
8294                   value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
8295       break;
8296 
8297     default:
8298       LOG(FATAL) << "Unsupported x86 constant area type " << insn->GetType();
8299   }
8300 }
8301 
8302 /**
8303  * Class to handle late fixup of offsets into constant area.
8304  */
8305 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8306  public:
RIPFixup(CodeGeneratorX86 & codegen,HX86ComputeBaseMethodAddress * base_method_address,size_t offset)8307   RIPFixup(CodeGeneratorX86& codegen,
8308            HX86ComputeBaseMethodAddress* base_method_address,
8309            size_t offset)
8310       : codegen_(&codegen),
8311         base_method_address_(base_method_address),
8312         offset_into_constant_area_(offset) {}
8313 
8314  protected:
SetOffset(size_t offset)8315   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8316 
8317   CodeGeneratorX86* codegen_;
8318   HX86ComputeBaseMethodAddress* base_method_address_;
8319 
8320  private:
Process(const MemoryRegion & region,int pos)8321   void Process(const MemoryRegion& region, int pos) override {
8322     // Patch the correct offset for the instruction.  The place to patch is the
8323     // last 4 bytes of the instruction.
8324     // The value to patch is the distance from the offset in the constant area
8325     // from the address computed by the HX86ComputeBaseMethodAddress instruction.
8326     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8327     int32_t relative_position =
8328         constant_offset - codegen_->GetMethodAddressOffset(base_method_address_);
8329 
8330     // Patch in the right value.
8331     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8332   }
8333 
8334   // Location in constant area that the fixup refers to.
8335   int32_t offset_into_constant_area_;
8336 };
8337 
8338 /**
8339  * Class to handle late fixup of offsets to a jump table that will be created in the
8340  * constant area.
8341  */
8342 class JumpTableRIPFixup : public RIPFixup {
8343  public:
JumpTableRIPFixup(CodeGeneratorX86 & codegen,HX86PackedSwitch * switch_instr)8344   JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr)
8345       : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)),
8346         switch_instr_(switch_instr) {}
8347 
CreateJumpTable()8348   void CreateJumpTable() {
8349     X86Assembler* assembler = codegen_->GetAssembler();
8350 
8351     // Ensure that the reference to the jump table has the correct offset.
8352     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8353     SetOffset(offset_in_constant_table);
8354 
8355     // The label values in the jump table are computed relative to the
8356     // instruction addressing the constant area.
8357     const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_);
8358 
8359     // Populate the jump table with the correct values for the jump table.
8360     int32_t num_entries = switch_instr_->GetNumEntries();
8361     HBasicBlock* block = switch_instr_->GetBlock();
8362     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8363     // The value that we want is the target offset - the position of the table.
8364     for (int32_t i = 0; i < num_entries; i++) {
8365       HBasicBlock* b = successors[i];
8366       Label* l = codegen_->GetLabelOf(b);
8367       DCHECK(l->IsBound());
8368       int32_t offset_to_block = l->Position() - relative_offset;
8369       assembler->AppendInt32(offset_to_block);
8370     }
8371   }
8372 
8373  private:
8374   const HX86PackedSwitch* switch_instr_;
8375 };
8376 
Finalize(CodeAllocator * allocator)8377 void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
8378   // Generate the constant area if needed.
8379   X86Assembler* assembler = GetAssembler();
8380 
8381   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8382     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
8383     // byte values.
8384     assembler->Align(4, 0);
8385     constant_area_start_ = assembler->CodeSize();
8386 
8387     // Populate any jump tables.
8388     for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8389       jump_table->CreateJumpTable();
8390     }
8391 
8392     // And now add the constant area to the generated code.
8393     assembler->AddConstantArea();
8394   }
8395 
8396   // And finish up.
8397   CodeGenerator::Finalize(allocator);
8398 }
8399 
LiteralDoubleAddress(double v,HX86ComputeBaseMethodAddress * method_base,Register reg)8400 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
8401                                                HX86ComputeBaseMethodAddress* method_base,
8402                                                Register reg) {
8403   AssemblerFixup* fixup =
8404       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v));
8405   return Address(reg, kDummy32BitOffset, fixup);
8406 }
8407 
LiteralFloatAddress(float v,HX86ComputeBaseMethodAddress * method_base,Register reg)8408 Address CodeGeneratorX86::LiteralFloatAddress(float v,
8409                                               HX86ComputeBaseMethodAddress* method_base,
8410                                               Register reg) {
8411   AssemblerFixup* fixup =
8412       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v));
8413   return Address(reg, kDummy32BitOffset, fixup);
8414 }
8415 
LiteralInt32Address(int32_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8416 Address CodeGeneratorX86::LiteralInt32Address(int32_t v,
8417                                               HX86ComputeBaseMethodAddress* method_base,
8418                                               Register reg) {
8419   AssemblerFixup* fixup =
8420       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v));
8421   return Address(reg, kDummy32BitOffset, fixup);
8422 }
8423 
LiteralInt64Address(int64_t v,HX86ComputeBaseMethodAddress * method_base,Register reg)8424 Address CodeGeneratorX86::LiteralInt64Address(int64_t v,
8425                                               HX86ComputeBaseMethodAddress* method_base,
8426                                               Register reg) {
8427   AssemblerFixup* fixup =
8428       new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v));
8429   return Address(reg, kDummy32BitOffset, fixup);
8430 }
8431 
Load32BitValue(Register dest,int32_t value)8432 void CodeGeneratorX86::Load32BitValue(Register dest, int32_t value) {
8433   if (value == 0) {
8434     __ xorl(dest, dest);
8435   } else {
8436     __ movl(dest, Immediate(value));
8437   }
8438 }
8439 
Compare32BitValue(Register dest,int32_t value)8440 void CodeGeneratorX86::Compare32BitValue(Register dest, int32_t value) {
8441   if (value == 0) {
8442     __ testl(dest, dest);
8443   } else {
8444     __ cmpl(dest, Immediate(value));
8445   }
8446 }
8447 
GenerateIntCompare(Location lhs,Location rhs)8448 void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
8449   Register lhs_reg = lhs.AsRegister<Register>();
8450   GenerateIntCompare(lhs_reg, rhs);
8451 }
8452 
GenerateIntCompare(Register lhs,Location rhs)8453 void CodeGeneratorX86::GenerateIntCompare(Register lhs, Location rhs) {
8454   if (rhs.IsConstant()) {
8455     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8456     Compare32BitValue(lhs, value);
8457   } else if (rhs.IsStackSlot()) {
8458     __ cmpl(lhs, Address(ESP, rhs.GetStackIndex()));
8459   } else {
8460     __ cmpl(lhs, rhs.AsRegister<Register>());
8461   }
8462 }
8463 
ArrayAddress(Register obj,Location index,ScaleFactor scale,uint32_t data_offset)8464 Address CodeGeneratorX86::ArrayAddress(Register obj,
8465                                        Location index,
8466                                        ScaleFactor scale,
8467                                        uint32_t data_offset) {
8468   return index.IsConstant() ?
8469       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
8470       Address(obj, index.AsRegister<Register>(), scale, data_offset);
8471 }
8472 
LiteralCaseTable(HX86PackedSwitch * switch_instr,Register reg,Register value)8473 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
8474                                            Register reg,
8475                                            Register value) {
8476   // Create a fixup to be used to create and address the jump table.
8477   JumpTableRIPFixup* table_fixup =
8478       new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8479 
8480   // We have to populate the jump tables.
8481   fixups_to_jump_tables_.push_back(table_fixup);
8482 
8483   // We want a scaled address, as we are extracting the correct offset from the table.
8484   return Address(reg, value, TIMES_4, kDummy32BitOffset, table_fixup);
8485 }
8486 
8487 // TODO: target as memory.
MoveFromReturnRegister(Location target,DataType::Type type)8488 void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) {
8489   if (!target.IsValid()) {
8490     DCHECK_EQ(type, DataType::Type::kVoid);
8491     return;
8492   }
8493 
8494   DCHECK_NE(type, DataType::Type::kVoid);
8495 
8496   Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type);
8497   if (target.Equals(return_loc)) {
8498     return;
8499   }
8500 
8501   // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
8502   //       with the else branch.
8503   if (type == DataType::Type::kInt64) {
8504     HParallelMove parallel_move(GetGraph()->GetAllocator());
8505     parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr);
8506     parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr);
8507     GetMoveResolver()->EmitNativeCode(&parallel_move);
8508   } else {
8509     // Let the parallel move resolver take care of all of this.
8510     HParallelMove parallel_move(GetGraph()->GetAllocator());
8511     parallel_move.AddMove(return_loc, target, type, nullptr);
8512     GetMoveResolver()->EmitNativeCode(&parallel_move);
8513   }
8514 }
8515 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8516 void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
8517                                        const uint8_t* roots_data,
8518                                        const PatchInfo<Label>& info,
8519                                        uint64_t index_in_table) const {
8520   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8521   uintptr_t address =
8522       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8523   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8524   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8525      dchecked_integral_cast<uint32_t>(address);
8526 }
8527 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8528 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8529   for (const PatchInfo<Label>& info : jit_string_patches_) {
8530     StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8531     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8532     PatchJitRootUse(code, roots_data, info, index_in_table);
8533   }
8534 
8535   for (const PatchInfo<Label>& info : jit_class_patches_) {
8536     TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8537     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8538     PatchJitRootUse(code, roots_data, info, index_in_table);
8539   }
8540 }
8541 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8542 void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8543                                                    ATTRIBUTE_UNUSED) {
8544   LOG(FATAL) << "Unreachable";
8545 }
8546 
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)8547 void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
8548                                                            ATTRIBUTE_UNUSED) {
8549   LOG(FATAL) << "Unreachable";
8550 }
8551 
CpuHasAvxFeatureFlag()8552 bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
8553   return codegen_->GetInstructionSetFeatures().HasAVX();
8554 }
CpuHasAvx2FeatureFlag()8555 bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
8556   return codegen_->GetInstructionSetFeatures().HasAVX2();
8557 }
CpuHasAvxFeatureFlag()8558 bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
8559   return codegen_->GetInstructionSetFeatures().HasAVX();
8560 }
CpuHasAvx2FeatureFlag()8561 bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
8562   return codegen_->GetInstructionSetFeatures().HasAVX2();
8563 }
8564 
8565 #undef __
8566 
8567 }  // namespace x86
8568 }  // namespace art
8569