• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_x86_64.h"
18 
19 #include "art_method.h"
20 #include "code_generator_utils.h"
21 #include "compiled_method.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "gc/accounting/card_table.h"
24 #include "intrinsics.h"
25 #include "intrinsics_x86_64.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/class-inl.h"
28 #include "mirror/object_reference.h"
29 #include "thread.h"
30 #include "utils/assembler.h"
31 #include "utils/stack_checks.h"
32 #include "utils/x86_64/assembler_x86_64.h"
33 #include "utils/x86_64/managed_register_x86_64.h"
34 
35 namespace art {
36 
37 template<class MirrorType>
38 class GcRoot;
39 
40 namespace x86_64 {
41 
42 static constexpr int kCurrentMethodStackOffset = 0;
43 static constexpr Register kMethodRegisterArgument = RDI;
44 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
45 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
46 // generates less code/data with a small num_entries.
47 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
48 
49 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
50 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
51 
52 static constexpr int kC2ConditionMask = 0x400;
53 
54 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
55 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->  // NOLINT
56 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
57 
58 class NullCheckSlowPathX86_64 : public SlowPathCode {
59  public:
NullCheckSlowPathX86_64(HNullCheck * instruction)60   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
61 
EmitNativeCode(CodeGenerator * codegen)62   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
63     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
64     __ Bind(GetEntryLabel());
65     if (instruction_->CanThrowIntoCatchBlock()) {
66       // Live registers will be restored in the catch block if caught.
67       SaveLiveRegisters(codegen, instruction_->GetLocations());
68     }
69     x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
70                                   instruction_,
71                                   instruction_->GetDexPc(),
72                                   this);
73     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
74   }
75 
IsFatal() const76   bool IsFatal() const OVERRIDE { return true; }
77 
GetDescription() const78   const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
79 
80  private:
81   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
82 };
83 
84 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
85  public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)86   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
87 
EmitNativeCode(CodeGenerator * codegen)88   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
89     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
90     __ Bind(GetEntryLabel());
91     x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
92     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
93   }
94 
IsFatal() const95   bool IsFatal() const OVERRIDE { return true; }
96 
GetDescription() const97   const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
98 
99  private:
100   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
101 };
102 
103 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
104  public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,Primitive::Type type,bool is_div)105   DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
106       : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
107 
EmitNativeCode(CodeGenerator * codegen)108   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
109     __ Bind(GetEntryLabel());
110     if (type_ == Primitive::kPrimInt) {
111       if (is_div_) {
112         __ negl(cpu_reg_);
113       } else {
114         __ xorl(cpu_reg_, cpu_reg_);
115       }
116 
117     } else {
118       DCHECK_EQ(Primitive::kPrimLong, type_);
119       if (is_div_) {
120         __ negq(cpu_reg_);
121       } else {
122         __ xorl(cpu_reg_, cpu_reg_);
123       }
124     }
125     __ jmp(GetExitLabel());
126   }
127 
GetDescription() const128   const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
129 
130  private:
131   const CpuRegister cpu_reg_;
132   const Primitive::Type type_;
133   const bool is_div_;
134   DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
135 };
136 
137 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
138  public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)139   SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
140       : SlowPathCode(instruction), successor_(successor) {}
141 
EmitNativeCode(CodeGenerator * codegen)142   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
143     LocationSummary* locations = instruction_->GetLocations();
144     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
145     __ Bind(GetEntryLabel());
146     SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
147     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
148     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
149     RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
150     if (successor_ == nullptr) {
151       __ jmp(GetReturnLabel());
152     } else {
153       __ jmp(x86_64_codegen->GetLabelOf(successor_));
154     }
155   }
156 
GetReturnLabel()157   Label* GetReturnLabel() {
158     DCHECK(successor_ == nullptr);
159     return &return_label_;
160   }
161 
GetSuccessor() const162   HBasicBlock* GetSuccessor() const {
163     return successor_;
164   }
165 
GetDescription() const166   const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
167 
168  private:
169   HBasicBlock* const successor_;
170   Label return_label_;
171 
172   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
173 };
174 
175 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
176  public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)177   explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
178     : SlowPathCode(instruction) {}
179 
EmitNativeCode(CodeGenerator * codegen)180   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
181     LocationSummary* locations = instruction_->GetLocations();
182     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
183     __ Bind(GetEntryLabel());
184     if (instruction_->CanThrowIntoCatchBlock()) {
185       // Live registers will be restored in the catch block if caught.
186       SaveLiveRegisters(codegen, instruction_->GetLocations());
187     }
188     // Are we using an array length from memory?
189     HInstruction* array_length = instruction_->InputAt(1);
190     Location length_loc = locations->InAt(1);
191     InvokeRuntimeCallingConvention calling_convention;
192     if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
193       // Load the array length into our temporary.
194       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
195       Location array_loc = array_length->GetLocations()->InAt(0);
196       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
197       length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
198       // Check for conflicts with index.
199       if (length_loc.Equals(locations->InAt(0))) {
200         // We know we aren't using parameter 2.
201         length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
202       }
203       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
204       if (mirror::kUseStringCompression) {
205         __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
206       }
207     }
208 
209     // We're moving two locations to locations that could overlap, so we need a parallel
210     // move resolver.
211     codegen->EmitParallelMoves(
212         locations->InAt(0),
213         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
214         Primitive::kPrimInt,
215         length_loc,
216         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
217         Primitive::kPrimInt);
218     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
219         ? kQuickThrowStringBounds
220         : kQuickThrowArrayBounds;
221     x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
222     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
223     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
224   }
225 
IsFatal() const226   bool IsFatal() const OVERRIDE { return true; }
227 
GetDescription() const228   const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
229 
230  private:
231   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
232 };
233 
234 class LoadClassSlowPathX86_64 : public SlowPathCode {
235  public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)236   LoadClassSlowPathX86_64(HLoadClass* cls,
237                           HInstruction* at,
238                           uint32_t dex_pc,
239                           bool do_clinit)
240       : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
241     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
242   }
243 
EmitNativeCode(CodeGenerator * codegen)244   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
245     LocationSummary* locations = instruction_->GetLocations();
246     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
247     __ Bind(GetEntryLabel());
248 
249     SaveLiveRegisters(codegen, locations);
250 
251     // Custom calling convention: RAX serves as both input and output.
252     __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_));
253     x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
254                                   instruction_,
255                                   dex_pc_,
256                                   this);
257     if (do_clinit_) {
258       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
259     } else {
260       CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
261     }
262 
263     Location out = locations->Out();
264     // Move the class to the desired location.
265     if (out.IsValid()) {
266       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
267       x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
268     }
269 
270     RestoreLiveRegisters(codegen, locations);
271     // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
272     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
273     if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
274       DCHECK(out.IsValid());
275       __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false),
276               locations->Out().AsRegister<CpuRegister>());
277       Label* fixup_label = x86_64_codegen->NewTypeBssEntryPatch(cls_);
278       __ Bind(fixup_label);
279     }
280     __ jmp(GetExitLabel());
281   }
282 
GetDescription() const283   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
284 
285  private:
286   // The class this slow path will load.
287   HLoadClass* const cls_;
288 
289   // The dex PC of `at_`.
290   const uint32_t dex_pc_;
291 
292   // Whether to initialize the class.
293   const bool do_clinit_;
294 
295   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
296 };
297 
298 class LoadStringSlowPathX86_64 : public SlowPathCode {
299  public:
LoadStringSlowPathX86_64(HLoadString * instruction)300   explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
301 
EmitNativeCode(CodeGenerator * codegen)302   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
303     LocationSummary* locations = instruction_->GetLocations();
304     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
305 
306     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
307     __ Bind(GetEntryLabel());
308     SaveLiveRegisters(codegen, locations);
309 
310     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
311     // Custom calling convention: RAX serves as both input and output.
312     __ movl(CpuRegister(RAX), Immediate(string_index.index_));
313     x86_64_codegen->InvokeRuntime(kQuickResolveString,
314                                   instruction_,
315                                   instruction_->GetDexPc(),
316                                   this);
317     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
318     x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
319     RestoreLiveRegisters(codegen, locations);
320 
321     // Store the resolved String to the BSS entry.
322     __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false),
323             locations->Out().AsRegister<CpuRegister>());
324     Label* fixup_label = x86_64_codegen->NewStringBssEntryPatch(instruction_->AsLoadString());
325     __ Bind(fixup_label);
326 
327     __ jmp(GetExitLabel());
328   }
329 
GetDescription() const330   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
331 
332  private:
333   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
334 };
335 
336 class TypeCheckSlowPathX86_64 : public SlowPathCode {
337  public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)338   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
339       : SlowPathCode(instruction), is_fatal_(is_fatal) {}
340 
EmitNativeCode(CodeGenerator * codegen)341   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
342     LocationSummary* locations = instruction_->GetLocations();
343     uint32_t dex_pc = instruction_->GetDexPc();
344     DCHECK(instruction_->IsCheckCast()
345            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
346 
347     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
348     __ Bind(GetEntryLabel());
349 
350     if (!is_fatal_) {
351       SaveLiveRegisters(codegen, locations);
352     }
353 
354     // We're moving two locations to locations that could overlap, so we need a parallel
355     // move resolver.
356     InvokeRuntimeCallingConvention calling_convention;
357     codegen->EmitParallelMoves(locations->InAt(0),
358                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
359                                Primitive::kPrimNot,
360                                locations->InAt(1),
361                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
362                                Primitive::kPrimNot);
363     if (instruction_->IsInstanceOf()) {
364       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
365       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
366     } else {
367       DCHECK(instruction_->IsCheckCast());
368       x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
369       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
370     }
371 
372     if (!is_fatal_) {
373       if (instruction_->IsInstanceOf()) {
374         x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
375       }
376 
377       RestoreLiveRegisters(codegen, locations);
378       __ jmp(GetExitLabel());
379     }
380   }
381 
GetDescription() const382   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
383 
IsFatal() const384   bool IsFatal() const OVERRIDE { return is_fatal_; }
385 
386  private:
387   const bool is_fatal_;
388 
389   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
390 };
391 
392 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
393  public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)394   explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
395       : SlowPathCode(instruction) {}
396 
EmitNativeCode(CodeGenerator * codegen)397   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
398     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
399     __ Bind(GetEntryLabel());
400     LocationSummary* locations = instruction_->GetLocations();
401     SaveLiveRegisters(codegen, locations);
402     InvokeRuntimeCallingConvention calling_convention;
403     x86_64_codegen->Load32BitValue(
404         CpuRegister(calling_convention.GetRegisterAt(0)),
405         static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
406     x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
407     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
408   }
409 
GetDescription() const410   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
411 
412  private:
413   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
414 };
415 
416 class ArraySetSlowPathX86_64 : public SlowPathCode {
417  public:
ArraySetSlowPathX86_64(HInstruction * instruction)418   explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
419 
EmitNativeCode(CodeGenerator * codegen)420   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
421     LocationSummary* locations = instruction_->GetLocations();
422     __ Bind(GetEntryLabel());
423     SaveLiveRegisters(codegen, locations);
424 
425     InvokeRuntimeCallingConvention calling_convention;
426     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
427     parallel_move.AddMove(
428         locations->InAt(0),
429         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
430         Primitive::kPrimNot,
431         nullptr);
432     parallel_move.AddMove(
433         locations->InAt(1),
434         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
435         Primitive::kPrimInt,
436         nullptr);
437     parallel_move.AddMove(
438         locations->InAt(2),
439         Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
440         Primitive::kPrimNot,
441         nullptr);
442     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
443 
444     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
445     x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
446     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
447     RestoreLiveRegisters(codegen, locations);
448     __ jmp(GetExitLabel());
449   }
450 
GetDescription() const451   const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
452 
453  private:
454   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
455 };
456 
457 // Slow path marking an object reference `ref` during a read
458 // barrier. The field `obj.field` in the object `obj` holding this
459 // reference does not get updated by this slow path after marking (see
460 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
461 //
462 // This means that after the execution of this slow path, `ref` will
463 // always be up-to-date, but `obj.field` may not; i.e., after the
464 // flip, `ref` will be a to-space reference, but `obj.field` will
465 // probably still be a from-space reference (unless it gets updated by
466 // another thread, or if another thread installed another object
467 // reference (different from `ref`) in `obj.field`).
468 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
469  public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)470   ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
471                                 Location ref,
472                                 bool unpoison_ref_before_marking)
473       : SlowPathCode(instruction),
474         ref_(ref),
475         unpoison_ref_before_marking_(unpoison_ref_before_marking) {
476     DCHECK(kEmitCompilerReadBarrier);
477   }
478 
GetDescription() const479   const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
480 
EmitNativeCode(CodeGenerator * codegen)481   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
482     LocationSummary* locations = instruction_->GetLocations();
483     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
484     Register ref_reg = ref_cpu_reg.AsRegister();
485     DCHECK(locations->CanCall());
486     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
487     DCHECK(instruction_->IsInstanceFieldGet() ||
488            instruction_->IsStaticFieldGet() ||
489            instruction_->IsArrayGet() ||
490            instruction_->IsArraySet() ||
491            instruction_->IsLoadClass() ||
492            instruction_->IsLoadString() ||
493            instruction_->IsInstanceOf() ||
494            instruction_->IsCheckCast() ||
495            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
496            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
497         << "Unexpected instruction in read barrier marking slow path: "
498         << instruction_->DebugName();
499 
500     __ Bind(GetEntryLabel());
501     if (unpoison_ref_before_marking_) {
502       // Object* ref = ref_addr->AsMirrorPtr()
503       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
504     }
505     // No need to save live registers; it's taken care of by the
506     // entrypoint. Also, there is no need to update the stack mask,
507     // as this runtime call will not trigger a garbage collection.
508     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
509     DCHECK_NE(ref_reg, RSP);
510     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
511     // "Compact" slow path, saving two moves.
512     //
513     // Instead of using the standard runtime calling convention (input
514     // and output in R0):
515     //
516     //   RDI <- ref
517     //   RAX <- ReadBarrierMark(RDI)
518     //   ref <- RAX
519     //
520     // we just use rX (the register containing `ref`) as input and output
521     // of a dedicated entrypoint:
522     //
523     //   rX <- ReadBarrierMarkRegX(rX)
524     //
525     int32_t entry_point_offset =
526         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
527     // This runtime call does not require a stack map.
528     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
529     __ jmp(GetExitLabel());
530   }
531 
532  private:
533   // The location (register) of the marked object reference.
534   const Location ref_;
535   // Should the reference in `ref_` be unpoisoned prior to marking it?
536   const bool unpoison_ref_before_marking_;
537 
538   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
539 };
540 
541 // Slow path marking an object reference `ref` during a read barrier,
542 // and if needed, atomically updating the field `obj.field` in the
543 // object `obj` holding this reference after marking (contrary to
544 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
545 // `obj.field`).
546 //
547 // This means that after the execution of this slow path, both `ref`
548 // and `obj.field` will be up-to-date; i.e., after the flip, both will
549 // hold the same to-space reference (unless another thread installed
550 // another object reference (different from `ref`) in `obj.field`).
551 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
552  public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)553   ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
554                                               Location ref,
555                                               CpuRegister obj,
556                                               const Address& field_addr,
557                                               bool unpoison_ref_before_marking,
558                                               CpuRegister temp1,
559                                               CpuRegister temp2)
560       : SlowPathCode(instruction),
561         ref_(ref),
562         obj_(obj),
563         field_addr_(field_addr),
564         unpoison_ref_before_marking_(unpoison_ref_before_marking),
565         temp1_(temp1),
566         temp2_(temp2) {
567     DCHECK(kEmitCompilerReadBarrier);
568   }
569 
GetDescription() const570   const char* GetDescription() const OVERRIDE {
571     return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
572   }
573 
EmitNativeCode(CodeGenerator * codegen)574   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
575     LocationSummary* locations = instruction_->GetLocations();
576     CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
577     Register ref_reg = ref_cpu_reg.AsRegister();
578     DCHECK(locations->CanCall());
579     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
580     // This slow path is only used by the UnsafeCASObject intrinsic.
581     DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
582         << "Unexpected instruction in read barrier marking and field updating slow path: "
583         << instruction_->DebugName();
584     DCHECK(instruction_->GetLocations()->Intrinsified());
585     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
586 
587     __ Bind(GetEntryLabel());
588     if (unpoison_ref_before_marking_) {
589       // Object* ref = ref_addr->AsMirrorPtr()
590       __ MaybeUnpoisonHeapReference(ref_cpu_reg);
591     }
592 
593     // Save the old (unpoisoned) reference.
594     __ movl(temp1_, ref_cpu_reg);
595 
596     // No need to save live registers; it's taken care of by the
597     // entrypoint. Also, there is no need to update the stack mask,
598     // as this runtime call will not trigger a garbage collection.
599     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
600     DCHECK_NE(ref_reg, RSP);
601     DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
602     // "Compact" slow path, saving two moves.
603     //
604     // Instead of using the standard runtime calling convention (input
605     // and output in R0):
606     //
607     //   RDI <- ref
608     //   RAX <- ReadBarrierMark(RDI)
609     //   ref <- RAX
610     //
611     // we just use rX (the register containing `ref`) as input and output
612     // of a dedicated entrypoint:
613     //
614     //   rX <- ReadBarrierMarkRegX(rX)
615     //
616     int32_t entry_point_offset =
617         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
618     // This runtime call does not require a stack map.
619     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
620 
621     // If the new reference is different from the old reference,
622     // update the field in the holder (`*field_addr`).
623     //
624     // Note that this field could also hold a different object, if
625     // another thread had concurrently changed it. In that case, the
626     // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
627     // operation below would abort the CAS, leaving the field as-is.
628     NearLabel done;
629     __ cmpl(temp1_, ref_cpu_reg);
630     __ j(kEqual, &done);
631 
632     // Update the the holder's field atomically.  This may fail if
633     // mutator updates before us, but it's OK.  This is achived
634     // using a strong compare-and-set (CAS) operation with relaxed
635     // memory synchronization ordering, where the expected value is
636     // the old reference and the desired value is the new reference.
637     // This operation is implemented with a 32-bit LOCK CMPXLCHG
638     // instruction, which requires the expected value (the old
639     // reference) to be in EAX.  Save RAX beforehand, and move the
640     // expected value (stored in `temp1_`) into EAX.
641     __ movq(temp2_, CpuRegister(RAX));
642     __ movl(CpuRegister(RAX), temp1_);
643 
644     // Convenience aliases.
645     CpuRegister base = obj_;
646     CpuRegister expected = CpuRegister(RAX);
647     CpuRegister value = ref_cpu_reg;
648 
649     bool base_equals_value = (base.AsRegister() == value.AsRegister());
650     Register value_reg = ref_reg;
651     if (kPoisonHeapReferences) {
652       if (base_equals_value) {
653         // If `base` and `value` are the same register location, move
654         // `value_reg` to a temporary register.  This way, poisoning
655         // `value_reg` won't invalidate `base`.
656         value_reg = temp1_.AsRegister();
657         __ movl(CpuRegister(value_reg), base);
658       }
659 
660       // Check that the register allocator did not assign the location
661       // of `expected` (RAX) to `value` nor to `base`, so that heap
662       // poisoning (when enabled) works as intended below.
663       // - If `value` were equal to `expected`, both references would
664       //   be poisoned twice, meaning they would not be poisoned at
665       //   all, as heap poisoning uses address negation.
666       // - If `base` were equal to `expected`, poisoning `expected`
667       //   would invalidate `base`.
668       DCHECK_NE(value_reg, expected.AsRegister());
669       DCHECK_NE(base.AsRegister(), expected.AsRegister());
670 
671       __ PoisonHeapReference(expected);
672       __ PoisonHeapReference(CpuRegister(value_reg));
673     }
674 
675     __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
676 
677     // If heap poisoning is enabled, we need to unpoison the values
678     // that were poisoned earlier.
679     if (kPoisonHeapReferences) {
680       if (base_equals_value) {
681         // `value_reg` has been moved to a temporary register, no need
682         // to unpoison it.
683       } else {
684         __ UnpoisonHeapReference(CpuRegister(value_reg));
685       }
686       // No need to unpoison `expected` (RAX), as it is be overwritten below.
687     }
688 
689     // Restore RAX.
690     __ movq(CpuRegister(RAX), temp2_);
691 
692     __ Bind(&done);
693     __ jmp(GetExitLabel());
694   }
695 
696  private:
697   // The location (register) of the marked object reference.
698   const Location ref_;
699   // The register containing the object holding the marked object reference field.
700   const CpuRegister obj_;
701   // The address of the marked reference field.  The base of this address must be `obj_`.
702   const Address field_addr_;
703 
704   // Should the reference in `ref_` be unpoisoned prior to marking it?
705   const bool unpoison_ref_before_marking_;
706 
707   const CpuRegister temp1_;
708   const CpuRegister temp2_;
709 
710   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
711 };
712 
713 // Slow path generating a read barrier for a heap reference.
714 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
715  public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)716   ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
717                                             Location out,
718                                             Location ref,
719                                             Location obj,
720                                             uint32_t offset,
721                                             Location index)
722       : SlowPathCode(instruction),
723         out_(out),
724         ref_(ref),
725         obj_(obj),
726         offset_(offset),
727         index_(index) {
728     DCHECK(kEmitCompilerReadBarrier);
729     // If `obj` is equal to `out` or `ref`, it means the initial
730     // object has been overwritten by (or after) the heap object
731     // reference load to be instrumented, e.g.:
732     //
733     //   __ movl(out, Address(out, offset));
734     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
735     //
736     // In that case, we have lost the information about the original
737     // object, and the emitted read barrier cannot work properly.
738     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
739     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
740 }
741 
EmitNativeCode(CodeGenerator * codegen)742   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
743     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
744     LocationSummary* locations = instruction_->GetLocations();
745     CpuRegister reg_out = out_.AsRegister<CpuRegister>();
746     DCHECK(locations->CanCall());
747     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
748     DCHECK(instruction_->IsInstanceFieldGet() ||
749            instruction_->IsStaticFieldGet() ||
750            instruction_->IsArrayGet() ||
751            instruction_->IsInstanceOf() ||
752            instruction_->IsCheckCast() ||
753            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
754         << "Unexpected instruction in read barrier for heap reference slow path: "
755         << instruction_->DebugName();
756 
757     __ Bind(GetEntryLabel());
758     SaveLiveRegisters(codegen, locations);
759 
760     // We may have to change the index's value, but as `index_` is a
761     // constant member (like other "inputs" of this slow path),
762     // introduce a copy of it, `index`.
763     Location index = index_;
764     if (index_.IsValid()) {
765       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
766       if (instruction_->IsArrayGet()) {
767         // Compute real offset and store it in index_.
768         Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
769         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
770         if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
771           // We are about to change the value of `index_reg` (see the
772           // calls to art::x86_64::X86_64Assembler::shll and
773           // art::x86_64::X86_64Assembler::AddImmediate below), but it
774           // has not been saved by the previous call to
775           // art::SlowPathCode::SaveLiveRegisters, as it is a
776           // callee-save register --
777           // art::SlowPathCode::SaveLiveRegisters does not consider
778           // callee-save registers, as it has been designed with the
779           // assumption that callee-save registers are supposed to be
780           // handled by the called function.  So, as a callee-save
781           // register, `index_reg` _would_ eventually be saved onto
782           // the stack, but it would be too late: we would have
783           // changed its value earlier.  Therefore, we manually save
784           // it here into another freely available register,
785           // `free_reg`, chosen of course among the caller-save
786           // registers (as a callee-save `free_reg` register would
787           // exhibit the same problem).
788           //
789           // Note we could have requested a temporary register from
790           // the register allocator instead; but we prefer not to, as
791           // this is a slow path, and we know we can find a
792           // caller-save register that is available.
793           Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
794           __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
795           index_reg = free_reg;
796           index = Location::RegisterLocation(index_reg);
797         } else {
798           // The initial register stored in `index_` has already been
799           // saved in the call to art::SlowPathCode::SaveLiveRegisters
800           // (as it is not a callee-save register), so we can freely
801           // use it.
802         }
803         // Shifting the index value contained in `index_reg` by the
804         // scale factor (2) cannot overflow in practice, as the
805         // runtime is unable to allocate object arrays with a size
806         // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
807         __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
808         static_assert(
809             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
810             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
811         __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
812       } else {
813         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
814         // intrinsics, `index_` is not shifted by a scale factor of 2
815         // (as in the case of ArrayGet), as it is actually an offset
816         // to an object field within an object.
817         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
818         DCHECK(instruction_->GetLocations()->Intrinsified());
819         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
820                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
821             << instruction_->AsInvoke()->GetIntrinsic();
822         DCHECK_EQ(offset_, 0U);
823         DCHECK(index_.IsRegister());
824       }
825     }
826 
827     // We're moving two or three locations to locations that could
828     // overlap, so we need a parallel move resolver.
829     InvokeRuntimeCallingConvention calling_convention;
830     HParallelMove parallel_move(codegen->GetGraph()->GetArena());
831     parallel_move.AddMove(ref_,
832                           Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
833                           Primitive::kPrimNot,
834                           nullptr);
835     parallel_move.AddMove(obj_,
836                           Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
837                           Primitive::kPrimNot,
838                           nullptr);
839     if (index.IsValid()) {
840       parallel_move.AddMove(index,
841                             Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
842                             Primitive::kPrimInt,
843                             nullptr);
844       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
845     } else {
846       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
847       __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
848     }
849     x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
850                                   instruction_,
851                                   instruction_->GetDexPc(),
852                                   this);
853     CheckEntrypointTypes<
854         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
855     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
856 
857     RestoreLiveRegisters(codegen, locations);
858     __ jmp(GetExitLabel());
859   }
860 
GetDescription() const861   const char* GetDescription() const OVERRIDE {
862     return "ReadBarrierForHeapReferenceSlowPathX86_64";
863   }
864 
865  private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)866   CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
867     size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
868     size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
869     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
870       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
871         return static_cast<CpuRegister>(i);
872       }
873     }
874     // We shall never fail to find a free caller-save register, as
875     // there are more than two core caller-save registers on x86-64
876     // (meaning it is possible to find one which is different from
877     // `ref` and `obj`).
878     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
879     LOG(FATAL) << "Could not find a free caller-save register";
880     UNREACHABLE();
881   }
882 
883   const Location out_;
884   const Location ref_;
885   const Location obj_;
886   const uint32_t offset_;
887   // An additional location containing an index to an array.
888   // Only used for HArrayGet and the UnsafeGetObject &
889   // UnsafeGetObjectVolatile intrinsics.
890   const Location index_;
891 
892   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
893 };
894 
895 // Slow path generating a read barrier for a GC root.
896 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
897  public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)898   ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
899       : SlowPathCode(instruction), out_(out), root_(root) {
900     DCHECK(kEmitCompilerReadBarrier);
901   }
902 
EmitNativeCode(CodeGenerator * codegen)903   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
904     LocationSummary* locations = instruction_->GetLocations();
905     DCHECK(locations->CanCall());
906     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
907     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
908         << "Unexpected instruction in read barrier for GC root slow path: "
909         << instruction_->DebugName();
910 
911     __ Bind(GetEntryLabel());
912     SaveLiveRegisters(codegen, locations);
913 
914     InvokeRuntimeCallingConvention calling_convention;
915     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
916     x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
917     x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
918                                   instruction_,
919                                   instruction_->GetDexPc(),
920                                   this);
921     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
922     x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
923 
924     RestoreLiveRegisters(codegen, locations);
925     __ jmp(GetExitLabel());
926   }
927 
GetDescription() const928   const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
929 
930  private:
931   const Location out_;
932   const Location root_;
933 
934   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
935 };
936 
937 #undef __
938 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
939 #define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
940 
X86_64IntegerCondition(IfCondition cond)941 inline Condition X86_64IntegerCondition(IfCondition cond) {
942   switch (cond) {
943     case kCondEQ: return kEqual;
944     case kCondNE: return kNotEqual;
945     case kCondLT: return kLess;
946     case kCondLE: return kLessEqual;
947     case kCondGT: return kGreater;
948     case kCondGE: return kGreaterEqual;
949     case kCondB:  return kBelow;
950     case kCondBE: return kBelowEqual;
951     case kCondA:  return kAbove;
952     case kCondAE: return kAboveEqual;
953   }
954   LOG(FATAL) << "Unreachable";
955   UNREACHABLE();
956 }
957 
958 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)959 inline Condition X86_64FPCondition(IfCondition cond) {
960   switch (cond) {
961     case kCondEQ: return kEqual;
962     case kCondNE: return kNotEqual;
963     case kCondLT: return kBelow;
964     case kCondLE: return kBelowEqual;
965     case kCondGT: return kAbove;
966     case kCondGE: return kAboveEqual;
967     default:      break;  // should not happen
968   };
969   LOG(FATAL) << "Unreachable";
970   UNREACHABLE();
971 }
972 
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)973 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
974       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
975       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
976   return desired_dispatch_info;
977 }
978 
GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)979 Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
980                                                                      Location temp) {
981   // All registers are assumed to be correctly set up.
982   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
983   switch (invoke->GetMethodLoadKind()) {
984     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
985       // temp = thread->string_init_entrypoint
986       uint32_t offset =
987           GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
988       __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true));
989       break;
990     }
991     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
992       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
993       break;
994     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
995       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
996       break;
997     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
998       __ movq(temp.AsRegister<CpuRegister>(),
999               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
1000       // Bind a new fixup label at the end of the "movl" insn.
1001       uint32_t offset = invoke->GetDexCacheArrayOffset();
1002       __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset));
1003       break;
1004     }
1005     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
1006       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
1007       Register method_reg;
1008       CpuRegister reg = temp.AsRegister<CpuRegister>();
1009       if (current_method.IsRegister()) {
1010         method_reg = current_method.AsRegister<Register>();
1011       } else {
1012         DCHECK(invoke->GetLocations()->Intrinsified());
1013         DCHECK(!current_method.IsValid());
1014         method_reg = reg.AsRegister();
1015         __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1016       }
1017       // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
1018       __ movq(reg,
1019               Address(CpuRegister(method_reg),
1020                       ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
1021       // temp = temp[index_in_cache];
1022       // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
1023       uint32_t index_in_cache = invoke->GetDexMethodIndex();
1024       __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
1025       break;
1026     }
1027   }
1028   return callee_method;
1029 }
1030 
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)1031 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
1032                                                      Location temp) {
1033   // All registers are assumed to be correctly set up.
1034   Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
1035 
1036   switch (invoke->GetCodePtrLocation()) {
1037     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
1038       __ call(&frame_entry_label_);
1039       break;
1040     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
1041       // (callee_method + offset_of_quick_compiled_code)()
1042       __ call(Address(callee_method.AsRegister<CpuRegister>(),
1043                       ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1044                           kX86_64PointerSize).SizeValue()));
1045       break;
1046   }
1047 
1048   DCHECK(!IsLeafMethod());
1049 }
1050 
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in)1051 void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
1052   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1053   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1054       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1055 
1056   // Use the calling convention instead of the location of the receiver, as
1057   // intrinsics may have put the receiver in a different register. In the intrinsics
1058   // slow path, the arguments have been moved to the right place, so here we are
1059   // guaranteed that the receiver is the first register of the calling convention.
1060   InvokeDexCallingConvention calling_convention;
1061   Register receiver = calling_convention.GetRegisterAt(0);
1062 
1063   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1064   // /* HeapReference<Class> */ temp = receiver->klass_
1065   __ movl(temp, Address(CpuRegister(receiver), class_offset));
1066   MaybeRecordImplicitNullCheck(invoke);
1067   // Instead of simply (possibly) unpoisoning `temp` here, we should
1068   // emit a read barrier for the previous class reference load.
1069   // However this is not required in practice, as this is an
1070   // intermediate/temporary reference and because the current
1071   // concurrent copying collector keeps the from-space memory
1072   // intact/accessible until the end of the marking phase (the
1073   // concurrent copying collector may not in the future).
1074   __ MaybeUnpoisonHeapReference(temp);
1075   // temp = temp->GetMethodAt(method_offset);
1076   __ movq(temp, Address(temp, method_offset));
1077   // call temp->GetEntryPoint();
1078   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1079       kX86_64PointerSize).SizeValue()));
1080 }
1081 
RecordBootStringPatch(HLoadString * load_string)1082 void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
1083   DCHECK(GetCompilerOptions().IsBootImage());
1084   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
1085   __ Bind(&string_patches_.back().label);
1086 }
1087 
RecordBootTypePatch(HLoadClass * load_class)1088 void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) {
1089   boot_image_type_patches_.emplace_back(load_class->GetDexFile(),
1090                                         load_class->GetTypeIndex().index_);
1091   __ Bind(&boot_image_type_patches_.back().label);
1092 }
1093 
NewTypeBssEntryPatch(HLoadClass * load_class)1094 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1095   type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1096   return &type_bss_entry_patches_.back().label;
1097 }
1098 
NewStringBssEntryPatch(HLoadString * load_string)1099 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1100   DCHECK(!GetCompilerOptions().IsBootImage());
1101   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
1102   return &string_patches_.back().label;
1103 }
1104 
NewPcRelativeDexCacheArrayPatch(const DexFile & dex_file,uint32_t element_offset)1105 Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
1106                                                             uint32_t element_offset) {
1107   // Add a patch entry and return the label.
1108   pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
1109   return &pc_relative_dex_cache_patches_.back().label;
1110 }
1111 
1112 // The label points to the end of the "movl" or another instruction but the literal offset
1113 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1114 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1115 
1116 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<LinkerPatch> * linker_patches)1117 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1118     const ArenaDeque<PatchInfo<Label>>& infos,
1119     ArenaVector<LinkerPatch>* linker_patches) {
1120   for (const PatchInfo<Label>& info : infos) {
1121     uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1122     linker_patches->push_back(
1123         Factory(literal_offset, &info.dex_file, info.label.Position(), info.index));
1124   }
1125 }
1126 
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)1127 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
1128   DCHECK(linker_patches->empty());
1129   size_t size =
1130       pc_relative_dex_cache_patches_.size() +
1131       string_patches_.size() +
1132       boot_image_type_patches_.size() +
1133       type_bss_entry_patches_.size();
1134   linker_patches->reserve(size);
1135   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
1136                                                                linker_patches);
1137   if (!GetCompilerOptions().IsBootImage()) {
1138     DCHECK(boot_image_type_patches_.empty());
1139     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
1140   } else {
1141     // These are always PC-relative, see GetSupportedLoadClassKind()/GetSupportedLoadStringKind().
1142     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
1143                                                                 linker_patches);
1144     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
1145   }
1146   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
1147                                                               linker_patches);
1148   DCHECK_EQ(size, linker_patches->size());
1149 }
1150 
DumpCoreRegister(std::ostream & stream,int reg) const1151 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1152   stream << Register(reg);
1153 }
1154 
DumpFloatingPointRegister(std::ostream & stream,int reg) const1155 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1156   stream << FloatRegister(reg);
1157 }
1158 
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1159 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1160   __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1161   return kX86_64WordSize;
1162 }
1163 
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1164 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1165   __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1166   return kX86_64WordSize;
1167 }
1168 
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1169 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1170   if (GetGraph()->HasSIMD()) {
1171     __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1172   } else {
1173     __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1174   }
1175   return GetFloatingPointSpillSlotSize();
1176 }
1177 
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1178 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1179   if (GetGraph()->HasSIMD()) {
1180     __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1181   } else {
1182     __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1183   }
1184   return GetFloatingPointSpillSlotSize();
1185 }
1186 
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1187 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1188                                         HInstruction* instruction,
1189                                         uint32_t dex_pc,
1190                                         SlowPathCode* slow_path) {
1191   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1192   GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1193   if (EntrypointRequiresStackMap(entrypoint)) {
1194     RecordPcInfo(instruction, dex_pc, slow_path);
1195   }
1196 }
1197 
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1198 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1199                                                               HInstruction* instruction,
1200                                                               SlowPathCode* slow_path) {
1201   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1202   GenerateInvokeRuntime(entry_point_offset);
1203 }
1204 
GenerateInvokeRuntime(int32_t entry_point_offset)1205 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1206   __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
1207 }
1208 
1209 static constexpr int kNumberOfCpuRegisterPairs = 0;
1210 // Use a fake return address register to mimic Quick.
1211 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const X86_64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1212 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1213                                          const X86_64InstructionSetFeatures& isa_features,
1214                                          const CompilerOptions& compiler_options,
1215                                          OptimizingCompilerStats* stats)
1216       : CodeGenerator(graph,
1217                       kNumberOfCpuRegisters,
1218                       kNumberOfFloatRegisters,
1219                       kNumberOfCpuRegisterPairs,
1220                       ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1221                                           arraysize(kCoreCalleeSaves))
1222                           | (1 << kFakeReturnRegister),
1223                       ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1224                                           arraysize(kFpuCalleeSaves)),
1225                       compiler_options,
1226                       stats),
1227         block_labels_(nullptr),
1228         location_builder_(graph, this),
1229         instruction_visitor_(graph, this),
1230         move_resolver_(graph->GetArena(), this),
1231         assembler_(graph->GetArena()),
1232         isa_features_(isa_features),
1233         constant_area_start_(0),
1234         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1235         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1236         boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1237         type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1238         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1239         jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1240         jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1241   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1242 }
1243 
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1244 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1245                                                                CodeGeneratorX86_64* codegen)
1246       : InstructionCodeGenerator(graph, codegen),
1247         assembler_(codegen->GetAssembler()),
1248         codegen_(codegen) {}
1249 
SetupBlockedRegisters() const1250 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1251   // Stack register is always reserved.
1252   blocked_core_registers_[RSP] = true;
1253 
1254   // Block the register used as TMP.
1255   blocked_core_registers_[TMP] = true;
1256 }
1257 
DWARFReg(Register reg)1258 static dwarf::Reg DWARFReg(Register reg) {
1259   return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1260 }
1261 
DWARFReg(FloatRegister reg)1262 static dwarf::Reg DWARFReg(FloatRegister reg) {
1263   return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1264 }
1265 
GenerateFrameEntry()1266 void CodeGeneratorX86_64::GenerateFrameEntry() {
1267   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1268   __ Bind(&frame_entry_label_);
1269   bool skip_overflow_check = IsLeafMethod()
1270       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1271   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1272 
1273   if (!skip_overflow_check) {
1274     __ testq(CpuRegister(RAX), Address(
1275         CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
1276     RecordPcInfo(nullptr, 0);
1277   }
1278 
1279   if (HasEmptyFrame()) {
1280     return;
1281   }
1282 
1283   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1284     Register reg = kCoreCalleeSaves[i];
1285     if (allocated_registers_.ContainsCoreRegister(reg)) {
1286       __ pushq(CpuRegister(reg));
1287       __ cfi().AdjustCFAOffset(kX86_64WordSize);
1288       __ cfi().RelOffset(DWARFReg(reg), 0);
1289     }
1290   }
1291 
1292   int adjust = GetFrameSize() - GetCoreSpillSize();
1293   __ subq(CpuRegister(RSP), Immediate(adjust));
1294   __ cfi().AdjustCFAOffset(adjust);
1295   uint32_t xmm_spill_location = GetFpuSpillStart();
1296   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1297 
1298   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1299     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1300       int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1301       __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1302       __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1303     }
1304   }
1305 
1306   if (GetGraph()->HasShouldDeoptimizeFlag()) {
1307     // Initialize should_deoptimize flag to 0.
1308     __ movl(Address(CpuRegister(RSP), xmm_spill_location - kShouldDeoptimizeFlagSize),
1309             Immediate(0));
1310   }
1311 
1312   // Save the current method if we need it. Note that we do not
1313   // do this in HCurrentMethod, as the instruction might have been removed
1314   // in the SSA graph.
1315   if (RequiresCurrentMethod()) {
1316     __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1317             CpuRegister(kMethodRegisterArgument));
1318   }
1319 }
1320 
GenerateFrameExit()1321 void CodeGeneratorX86_64::GenerateFrameExit() {
1322   __ cfi().RememberState();
1323   if (!HasEmptyFrame()) {
1324     uint32_t xmm_spill_location = GetFpuSpillStart();
1325     size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1326     for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1327       if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1328         int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1329         __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1330         __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1331       }
1332     }
1333 
1334     int adjust = GetFrameSize() - GetCoreSpillSize();
1335     __ addq(CpuRegister(RSP), Immediate(adjust));
1336     __ cfi().AdjustCFAOffset(-adjust);
1337 
1338     for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1339       Register reg = kCoreCalleeSaves[i];
1340       if (allocated_registers_.ContainsCoreRegister(reg)) {
1341         __ popq(CpuRegister(reg));
1342         __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1343         __ cfi().Restore(DWARFReg(reg));
1344       }
1345     }
1346   }
1347   __ ret();
1348   __ cfi().RestoreState();
1349   __ cfi().DefCFAOffset(GetFrameSize());
1350 }
1351 
Bind(HBasicBlock * block)1352 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1353   __ Bind(GetLabelOf(block));
1354 }
1355 
Move(Location destination,Location source)1356 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1357   if (source.Equals(destination)) {
1358     return;
1359   }
1360   if (destination.IsRegister()) {
1361     CpuRegister dest = destination.AsRegister<CpuRegister>();
1362     if (source.IsRegister()) {
1363       __ movq(dest, source.AsRegister<CpuRegister>());
1364     } else if (source.IsFpuRegister()) {
1365       __ movd(dest, source.AsFpuRegister<XmmRegister>());
1366     } else if (source.IsStackSlot()) {
1367       __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1368     } else if (source.IsConstant()) {
1369       HConstant* constant = source.GetConstant();
1370       if (constant->IsLongConstant()) {
1371         Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1372       } else {
1373         Load32BitValue(dest, GetInt32ValueOf(constant));
1374       }
1375     } else {
1376       DCHECK(source.IsDoubleStackSlot());
1377       __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1378     }
1379   } else if (destination.IsFpuRegister()) {
1380     XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1381     if (source.IsRegister()) {
1382       __ movd(dest, source.AsRegister<CpuRegister>());
1383     } else if (source.IsFpuRegister()) {
1384       __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1385     } else if (source.IsConstant()) {
1386       HConstant* constant = source.GetConstant();
1387       int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1388       if (constant->IsFloatConstant()) {
1389         Load32BitValue(dest, static_cast<int32_t>(value));
1390       } else {
1391         Load64BitValue(dest, value);
1392       }
1393     } else if (source.IsStackSlot()) {
1394       __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1395     } else {
1396       DCHECK(source.IsDoubleStackSlot());
1397       __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1398     }
1399   } else if (destination.IsStackSlot()) {
1400     if (source.IsRegister()) {
1401       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1402               source.AsRegister<CpuRegister>());
1403     } else if (source.IsFpuRegister()) {
1404       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1405                source.AsFpuRegister<XmmRegister>());
1406     } else if (source.IsConstant()) {
1407       HConstant* constant = source.GetConstant();
1408       int32_t value = GetInt32ValueOf(constant);
1409       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1410     } else {
1411       DCHECK(source.IsStackSlot()) << source;
1412       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1413       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1414     }
1415   } else {
1416     DCHECK(destination.IsDoubleStackSlot());
1417     if (source.IsRegister()) {
1418       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1419               source.AsRegister<CpuRegister>());
1420     } else if (source.IsFpuRegister()) {
1421       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1422                source.AsFpuRegister<XmmRegister>());
1423     } else if (source.IsConstant()) {
1424       HConstant* constant = source.GetConstant();
1425       DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1426       int64_t value = GetInt64ValueOf(constant);
1427       Store64BitValueToStack(destination, value);
1428     } else {
1429       DCHECK(source.IsDoubleStackSlot());
1430       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1431       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1432     }
1433   }
1434 }
1435 
MoveConstant(Location location,int32_t value)1436 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1437   DCHECK(location.IsRegister());
1438   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1439 }
1440 
MoveLocation(Location dst,Location src,Primitive::Type dst_type ATTRIBUTE_UNUSED)1441 void CodeGeneratorX86_64::MoveLocation(
1442     Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
1443   Move(dst, src);
1444 }
1445 
AddLocationAsTemp(Location location,LocationSummary * locations)1446 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1447   if (location.IsRegister()) {
1448     locations->AddTemp(location);
1449   } else {
1450     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1451   }
1452 }
1453 
HandleGoto(HInstruction * got,HBasicBlock * successor)1454 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1455   DCHECK(!successor->IsExitBlock());
1456 
1457   HBasicBlock* block = got->GetBlock();
1458   HInstruction* previous = got->GetPrevious();
1459 
1460   HLoopInformation* info = block->GetLoopInformation();
1461   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1462     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1463     return;
1464   }
1465 
1466   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1467     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1468   }
1469   if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1470     __ jmp(codegen_->GetLabelOf(successor));
1471   }
1472 }
1473 
VisitGoto(HGoto * got)1474 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1475   got->SetLocations(nullptr);
1476 }
1477 
VisitGoto(HGoto * got)1478 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1479   HandleGoto(got, got->GetSuccessor());
1480 }
1481 
VisitTryBoundary(HTryBoundary * try_boundary)1482 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1483   try_boundary->SetLocations(nullptr);
1484 }
1485 
VisitTryBoundary(HTryBoundary * try_boundary)1486 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1487   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1488   if (!successor->IsExitBlock()) {
1489     HandleGoto(try_boundary, successor);
1490   }
1491 }
1492 
VisitExit(HExit * exit)1493 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1494   exit->SetLocations(nullptr);
1495 }
1496 
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1497 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1498 }
1499 
1500 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1501 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1502                                                      LabelType* true_label,
1503                                                      LabelType* false_label) {
1504   if (cond->IsFPConditionTrueIfNaN()) {
1505     __ j(kUnordered, true_label);
1506   } else if (cond->IsFPConditionFalseIfNaN()) {
1507     __ j(kUnordered, false_label);
1508   }
1509   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1510 }
1511 
GenerateCompareTest(HCondition * condition)1512 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1513   LocationSummary* locations = condition->GetLocations();
1514 
1515   Location left = locations->InAt(0);
1516   Location right = locations->InAt(1);
1517   Primitive::Type type = condition->InputAt(0)->GetType();
1518   switch (type) {
1519     case Primitive::kPrimBoolean:
1520     case Primitive::kPrimByte:
1521     case Primitive::kPrimChar:
1522     case Primitive::kPrimShort:
1523     case Primitive::kPrimInt:
1524     case Primitive::kPrimNot: {
1525       codegen_->GenerateIntCompare(left, right);
1526       break;
1527     }
1528     case Primitive::kPrimLong: {
1529       codegen_->GenerateLongCompare(left, right);
1530       break;
1531     }
1532     case Primitive::kPrimFloat: {
1533       if (right.IsFpuRegister()) {
1534         __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1535       } else if (right.IsConstant()) {
1536         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1537                    codegen_->LiteralFloatAddress(
1538                      right.GetConstant()->AsFloatConstant()->GetValue()));
1539       } else {
1540         DCHECK(right.IsStackSlot());
1541         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1542                    Address(CpuRegister(RSP), right.GetStackIndex()));
1543       }
1544       break;
1545     }
1546     case Primitive::kPrimDouble: {
1547       if (right.IsFpuRegister()) {
1548         __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1549       } else if (right.IsConstant()) {
1550         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1551                    codegen_->LiteralDoubleAddress(
1552                      right.GetConstant()->AsDoubleConstant()->GetValue()));
1553       } else {
1554         DCHECK(right.IsDoubleStackSlot());
1555         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1556                    Address(CpuRegister(RSP), right.GetStackIndex()));
1557       }
1558       break;
1559     }
1560     default:
1561       LOG(FATAL) << "Unexpected condition type " << type;
1562   }
1563 }
1564 
1565 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1566 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1567                                                                   LabelType* true_target_in,
1568                                                                   LabelType* false_target_in) {
1569   // Generated branching requires both targets to be explicit. If either of the
1570   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1571   LabelType fallthrough_target;
1572   LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1573   LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1574 
1575   // Generate the comparison to set the CC.
1576   GenerateCompareTest(condition);
1577 
1578   // Now generate the correct jump(s).
1579   Primitive::Type type = condition->InputAt(0)->GetType();
1580   switch (type) {
1581     case Primitive::kPrimLong: {
1582       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1583       break;
1584     }
1585     case Primitive::kPrimFloat: {
1586       GenerateFPJumps(condition, true_target, false_target);
1587       break;
1588     }
1589     case Primitive::kPrimDouble: {
1590       GenerateFPJumps(condition, true_target, false_target);
1591       break;
1592     }
1593     default:
1594       LOG(FATAL) << "Unexpected condition type " << type;
1595   }
1596 
1597   if (false_target != &fallthrough_target) {
1598     __ jmp(false_target);
1599   }
1600 
1601   if (fallthrough_target.IsLinked()) {
1602     __ Bind(&fallthrough_target);
1603   }
1604 }
1605 
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1606 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1607   // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1608   // are set only strictly before `branch`. We can't use the eflags on long
1609   // conditions if they are materialized due to the complex branching.
1610   return cond->IsCondition() &&
1611          cond->GetNext() == branch &&
1612          !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1613 }
1614 
1615 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1616 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1617                                                            size_t condition_input_index,
1618                                                            LabelType* true_target,
1619                                                            LabelType* false_target) {
1620   HInstruction* cond = instruction->InputAt(condition_input_index);
1621 
1622   if (true_target == nullptr && false_target == nullptr) {
1623     // Nothing to do. The code always falls through.
1624     return;
1625   } else if (cond->IsIntConstant()) {
1626     // Constant condition, statically compared against "true" (integer value 1).
1627     if (cond->AsIntConstant()->IsTrue()) {
1628       if (true_target != nullptr) {
1629         __ jmp(true_target);
1630       }
1631     } else {
1632       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1633       if (false_target != nullptr) {
1634         __ jmp(false_target);
1635       }
1636     }
1637     return;
1638   }
1639 
1640   // The following code generates these patterns:
1641   //  (1) true_target == nullptr && false_target != nullptr
1642   //        - opposite condition true => branch to false_target
1643   //  (2) true_target != nullptr && false_target == nullptr
1644   //        - condition true => branch to true_target
1645   //  (3) true_target != nullptr && false_target != nullptr
1646   //        - condition true => branch to true_target
1647   //        - branch to false_target
1648   if (IsBooleanValueOrMaterializedCondition(cond)) {
1649     if (AreEflagsSetFrom(cond, instruction)) {
1650       if (true_target == nullptr) {
1651         __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1652       } else {
1653         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1654       }
1655     } else {
1656       // Materialized condition, compare against 0.
1657       Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1658       if (lhs.IsRegister()) {
1659         __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1660       } else {
1661         __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1662       }
1663       if (true_target == nullptr) {
1664         __ j(kEqual, false_target);
1665       } else {
1666         __ j(kNotEqual, true_target);
1667       }
1668     }
1669   } else {
1670     // Condition has not been materialized, use its inputs as the
1671     // comparison and its condition as the branch condition.
1672     HCondition* condition = cond->AsCondition();
1673 
1674     // If this is a long or FP comparison that has been folded into
1675     // the HCondition, generate the comparison directly.
1676     Primitive::Type type = condition->InputAt(0)->GetType();
1677     if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1678       GenerateCompareTestAndBranch(condition, true_target, false_target);
1679       return;
1680     }
1681 
1682     Location lhs = condition->GetLocations()->InAt(0);
1683     Location rhs = condition->GetLocations()->InAt(1);
1684     codegen_->GenerateIntCompare(lhs, rhs);
1685       if (true_target == nullptr) {
1686       __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1687     } else {
1688       __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1689     }
1690   }
1691 
1692   // If neither branch falls through (case 3), the conditional branch to `true_target`
1693   // was already emitted (case 2) and we need to emit a jump to `false_target`.
1694   if (true_target != nullptr && false_target != nullptr) {
1695     __ jmp(false_target);
1696   }
1697 }
1698 
VisitIf(HIf * if_instr)1699 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1700   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1701   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1702     locations->SetInAt(0, Location::Any());
1703   }
1704 }
1705 
VisitIf(HIf * if_instr)1706 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1707   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1708   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1709   Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1710       nullptr : codegen_->GetLabelOf(true_successor);
1711   Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1712       nullptr : codegen_->GetLabelOf(false_successor);
1713   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1714 }
1715 
VisitDeoptimize(HDeoptimize * deoptimize)1716 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1717   LocationSummary* locations = new (GetGraph()->GetArena())
1718       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1719   InvokeRuntimeCallingConvention calling_convention;
1720   RegisterSet caller_saves = RegisterSet::Empty();
1721   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1722   locations->SetCustomSlowPathCallerSaves(caller_saves);
1723   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1724     locations->SetInAt(0, Location::Any());
1725   }
1726 }
1727 
VisitDeoptimize(HDeoptimize * deoptimize)1728 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1729   SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1730   GenerateTestAndBranch<Label>(deoptimize,
1731                                /* condition_input_index */ 0,
1732                                slow_path->GetEntryLabel(),
1733                                /* false_target */ nullptr);
1734 }
1735 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1736 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1737   LocationSummary* locations = new (GetGraph()->GetArena())
1738       LocationSummary(flag, LocationSummary::kNoCall);
1739   locations->SetOut(Location::RequiresRegister());
1740 }
1741 
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1742 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1743   __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
1744           Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1745 }
1746 
SelectCanUseCMOV(HSelect * select)1747 static bool SelectCanUseCMOV(HSelect* select) {
1748   // There are no conditional move instructions for XMMs.
1749   if (Primitive::IsFloatingPointType(select->GetType())) {
1750     return false;
1751   }
1752 
1753   // A FP condition doesn't generate the single CC that we need.
1754   HInstruction* condition = select->GetCondition();
1755   if (condition->IsCondition() &&
1756       Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1757     return false;
1758   }
1759 
1760   // We can generate a CMOV for this Select.
1761   return true;
1762 }
1763 
VisitSelect(HSelect * select)1764 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1765   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1766   if (Primitive::IsFloatingPointType(select->GetType())) {
1767     locations->SetInAt(0, Location::RequiresFpuRegister());
1768     locations->SetInAt(1, Location::Any());
1769   } else {
1770     locations->SetInAt(0, Location::RequiresRegister());
1771     if (SelectCanUseCMOV(select)) {
1772       if (select->InputAt(1)->IsConstant()) {
1773         locations->SetInAt(1, Location::RequiresRegister());
1774       } else {
1775         locations->SetInAt(1, Location::Any());
1776       }
1777     } else {
1778       locations->SetInAt(1, Location::Any());
1779     }
1780   }
1781   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1782     locations->SetInAt(2, Location::RequiresRegister());
1783   }
1784   locations->SetOut(Location::SameAsFirstInput());
1785 }
1786 
VisitSelect(HSelect * select)1787 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1788   LocationSummary* locations = select->GetLocations();
1789   if (SelectCanUseCMOV(select)) {
1790     // If both the condition and the source types are integer, we can generate
1791     // a CMOV to implement Select.
1792     CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1793     Location value_true_loc = locations->InAt(1);
1794     DCHECK(locations->InAt(0).Equals(locations->Out()));
1795 
1796     HInstruction* select_condition = select->GetCondition();
1797     Condition cond = kNotEqual;
1798 
1799     // Figure out how to test the 'condition'.
1800     if (select_condition->IsCondition()) {
1801       HCondition* condition = select_condition->AsCondition();
1802       if (!condition->IsEmittedAtUseSite()) {
1803         // This was a previously materialized condition.
1804         // Can we use the existing condition code?
1805         if (AreEflagsSetFrom(condition, select)) {
1806           // Materialization was the previous instruction.  Condition codes are right.
1807           cond = X86_64IntegerCondition(condition->GetCondition());
1808         } else {
1809           // No, we have to recreate the condition code.
1810           CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1811           __ testl(cond_reg, cond_reg);
1812         }
1813       } else {
1814         GenerateCompareTest(condition);
1815         cond = X86_64IntegerCondition(condition->GetCondition());
1816       }
1817     } else {
1818       // Must be a Boolean condition, which needs to be compared to 0.
1819       CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1820       __ testl(cond_reg, cond_reg);
1821     }
1822 
1823     // If the condition is true, overwrite the output, which already contains false.
1824     // Generate the correct sized CMOV.
1825     bool is_64_bit = Primitive::Is64BitType(select->GetType());
1826     if (value_true_loc.IsRegister()) {
1827       __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1828     } else {
1829       __ cmov(cond,
1830               value_false,
1831               Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1832     }
1833   } else {
1834     NearLabel false_target;
1835     GenerateTestAndBranch<NearLabel>(select,
1836                                      /* condition_input_index */ 2,
1837                                      /* true_target */ nullptr,
1838                                      &false_target);
1839     codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1840     __ Bind(&false_target);
1841   }
1842 }
1843 
VisitNativeDebugInfo(HNativeDebugInfo * info)1844 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1845   new (GetGraph()->GetArena()) LocationSummary(info);
1846 }
1847 
VisitNativeDebugInfo(HNativeDebugInfo *)1848 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1849   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1850 }
1851 
GenerateNop()1852 void CodeGeneratorX86_64::GenerateNop() {
1853   __ nop();
1854 }
1855 
HandleCondition(HCondition * cond)1856 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1857   LocationSummary* locations =
1858       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1859   // Handle the long/FP comparisons made in instruction simplification.
1860   switch (cond->InputAt(0)->GetType()) {
1861     case Primitive::kPrimLong:
1862       locations->SetInAt(0, Location::RequiresRegister());
1863       locations->SetInAt(1, Location::Any());
1864       break;
1865     case Primitive::kPrimFloat:
1866     case Primitive::kPrimDouble:
1867       locations->SetInAt(0, Location::RequiresFpuRegister());
1868       locations->SetInAt(1, Location::Any());
1869       break;
1870     default:
1871       locations->SetInAt(0, Location::RequiresRegister());
1872       locations->SetInAt(1, Location::Any());
1873       break;
1874   }
1875   if (!cond->IsEmittedAtUseSite()) {
1876     locations->SetOut(Location::RequiresRegister());
1877   }
1878 }
1879 
HandleCondition(HCondition * cond)1880 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1881   if (cond->IsEmittedAtUseSite()) {
1882     return;
1883   }
1884 
1885   LocationSummary* locations = cond->GetLocations();
1886   Location lhs = locations->InAt(0);
1887   Location rhs = locations->InAt(1);
1888   CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1889   NearLabel true_label, false_label;
1890 
1891   switch (cond->InputAt(0)->GetType()) {
1892     default:
1893       // Integer case.
1894 
1895       // Clear output register: setcc only sets the low byte.
1896       __ xorl(reg, reg);
1897 
1898       codegen_->GenerateIntCompare(lhs, rhs);
1899       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1900       return;
1901     case Primitive::kPrimLong:
1902       // Clear output register: setcc only sets the low byte.
1903       __ xorl(reg, reg);
1904 
1905       codegen_->GenerateLongCompare(lhs, rhs);
1906       __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1907       return;
1908     case Primitive::kPrimFloat: {
1909       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1910       if (rhs.IsConstant()) {
1911         float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1912         __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1913       } else if (rhs.IsStackSlot()) {
1914         __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1915       } else {
1916         __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1917       }
1918       GenerateFPJumps(cond, &true_label, &false_label);
1919       break;
1920     }
1921     case Primitive::kPrimDouble: {
1922       XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1923       if (rhs.IsConstant()) {
1924         double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1925         __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1926       } else if (rhs.IsDoubleStackSlot()) {
1927         __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1928       } else {
1929         __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1930       }
1931       GenerateFPJumps(cond, &true_label, &false_label);
1932       break;
1933     }
1934   }
1935 
1936   // Convert the jumps into the result.
1937   NearLabel done_label;
1938 
1939   // False case: result = 0.
1940   __ Bind(&false_label);
1941   __ xorl(reg, reg);
1942   __ jmp(&done_label);
1943 
1944   // True case: result = 1.
1945   __ Bind(&true_label);
1946   __ movl(reg, Immediate(1));
1947   __ Bind(&done_label);
1948 }
1949 
VisitEqual(HEqual * comp)1950 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1951   HandleCondition(comp);
1952 }
1953 
VisitEqual(HEqual * comp)1954 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1955   HandleCondition(comp);
1956 }
1957 
VisitNotEqual(HNotEqual * comp)1958 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1959   HandleCondition(comp);
1960 }
1961 
VisitNotEqual(HNotEqual * comp)1962 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1963   HandleCondition(comp);
1964 }
1965 
VisitLessThan(HLessThan * comp)1966 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1967   HandleCondition(comp);
1968 }
1969 
VisitLessThan(HLessThan * comp)1970 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1971   HandleCondition(comp);
1972 }
1973 
VisitLessThanOrEqual(HLessThanOrEqual * comp)1974 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1975   HandleCondition(comp);
1976 }
1977 
VisitLessThanOrEqual(HLessThanOrEqual * comp)1978 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1979   HandleCondition(comp);
1980 }
1981 
VisitGreaterThan(HGreaterThan * comp)1982 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1983   HandleCondition(comp);
1984 }
1985 
VisitGreaterThan(HGreaterThan * comp)1986 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1987   HandleCondition(comp);
1988 }
1989 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1990 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1991   HandleCondition(comp);
1992 }
1993 
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1994 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1995   HandleCondition(comp);
1996 }
1997 
VisitBelow(HBelow * comp)1998 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
1999   HandleCondition(comp);
2000 }
2001 
VisitBelow(HBelow * comp)2002 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2003   HandleCondition(comp);
2004 }
2005 
VisitBelowOrEqual(HBelowOrEqual * comp)2006 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2007   HandleCondition(comp);
2008 }
2009 
VisitBelowOrEqual(HBelowOrEqual * comp)2010 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2011   HandleCondition(comp);
2012 }
2013 
VisitAbove(HAbove * comp)2014 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2015   HandleCondition(comp);
2016 }
2017 
VisitAbove(HAbove * comp)2018 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2019   HandleCondition(comp);
2020 }
2021 
VisitAboveOrEqual(HAboveOrEqual * comp)2022 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2023   HandleCondition(comp);
2024 }
2025 
VisitAboveOrEqual(HAboveOrEqual * comp)2026 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2027   HandleCondition(comp);
2028 }
2029 
VisitCompare(HCompare * compare)2030 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2031   LocationSummary* locations =
2032       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
2033   switch (compare->InputAt(0)->GetType()) {
2034     case Primitive::kPrimBoolean:
2035     case Primitive::kPrimByte:
2036     case Primitive::kPrimShort:
2037     case Primitive::kPrimChar:
2038     case Primitive::kPrimInt:
2039     case Primitive::kPrimLong: {
2040       locations->SetInAt(0, Location::RequiresRegister());
2041       locations->SetInAt(1, Location::Any());
2042       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2043       break;
2044     }
2045     case Primitive::kPrimFloat:
2046     case Primitive::kPrimDouble: {
2047       locations->SetInAt(0, Location::RequiresFpuRegister());
2048       locations->SetInAt(1, Location::Any());
2049       locations->SetOut(Location::RequiresRegister());
2050       break;
2051     }
2052     default:
2053       LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2054   }
2055 }
2056 
VisitCompare(HCompare * compare)2057 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2058   LocationSummary* locations = compare->GetLocations();
2059   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2060   Location left = locations->InAt(0);
2061   Location right = locations->InAt(1);
2062 
2063   NearLabel less, greater, done;
2064   Primitive::Type type = compare->InputAt(0)->GetType();
2065   Condition less_cond = kLess;
2066 
2067   switch (type) {
2068     case Primitive::kPrimBoolean:
2069     case Primitive::kPrimByte:
2070     case Primitive::kPrimShort:
2071     case Primitive::kPrimChar:
2072     case Primitive::kPrimInt: {
2073       codegen_->GenerateIntCompare(left, right);
2074       break;
2075     }
2076     case Primitive::kPrimLong: {
2077       codegen_->GenerateLongCompare(left, right);
2078       break;
2079     }
2080     case Primitive::kPrimFloat: {
2081       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2082       if (right.IsConstant()) {
2083         float value = right.GetConstant()->AsFloatConstant()->GetValue();
2084         __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2085       } else if (right.IsStackSlot()) {
2086         __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2087       } else {
2088         __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2089       }
2090       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2091       less_cond = kBelow;  //  ucomis{s,d} sets CF
2092       break;
2093     }
2094     case Primitive::kPrimDouble: {
2095       XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2096       if (right.IsConstant()) {
2097         double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2098         __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2099       } else if (right.IsDoubleStackSlot()) {
2100         __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2101       } else {
2102         __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2103       }
2104       __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2105       less_cond = kBelow;  //  ucomis{s,d} sets CF
2106       break;
2107     }
2108     default:
2109       LOG(FATAL) << "Unexpected compare type " << type;
2110   }
2111 
2112   __ movl(out, Immediate(0));
2113   __ j(kEqual, &done);
2114   __ j(less_cond, &less);
2115 
2116   __ Bind(&greater);
2117   __ movl(out, Immediate(1));
2118   __ jmp(&done);
2119 
2120   __ Bind(&less);
2121   __ movl(out, Immediate(-1));
2122 
2123   __ Bind(&done);
2124 }
2125 
VisitIntConstant(HIntConstant * constant)2126 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2127   LocationSummary* locations =
2128       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2129   locations->SetOut(Location::ConstantLocation(constant));
2130 }
2131 
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2132 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2133   // Will be generated at use site.
2134 }
2135 
VisitNullConstant(HNullConstant * constant)2136 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2137   LocationSummary* locations =
2138       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2139   locations->SetOut(Location::ConstantLocation(constant));
2140 }
2141 
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2142 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2143   // Will be generated at use site.
2144 }
2145 
VisitLongConstant(HLongConstant * constant)2146 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2147   LocationSummary* locations =
2148       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2149   locations->SetOut(Location::ConstantLocation(constant));
2150 }
2151 
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2152 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2153   // Will be generated at use site.
2154 }
2155 
VisitFloatConstant(HFloatConstant * constant)2156 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2157   LocationSummary* locations =
2158       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2159   locations->SetOut(Location::ConstantLocation(constant));
2160 }
2161 
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2162 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2163   // Will be generated at use site.
2164 }
2165 
VisitDoubleConstant(HDoubleConstant * constant)2166 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2167   LocationSummary* locations =
2168       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2169   locations->SetOut(Location::ConstantLocation(constant));
2170 }
2171 
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2172 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2173     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2174   // Will be generated at use site.
2175 }
2176 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2177 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2178   memory_barrier->SetLocations(nullptr);
2179 }
2180 
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2181 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2182   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2183 }
2184 
VisitReturnVoid(HReturnVoid * ret)2185 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2186   ret->SetLocations(nullptr);
2187 }
2188 
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2189 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2190   codegen_->GenerateFrameExit();
2191 }
2192 
VisitReturn(HReturn * ret)2193 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2194   LocationSummary* locations =
2195       new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2196   switch (ret->InputAt(0)->GetType()) {
2197     case Primitive::kPrimBoolean:
2198     case Primitive::kPrimByte:
2199     case Primitive::kPrimChar:
2200     case Primitive::kPrimShort:
2201     case Primitive::kPrimInt:
2202     case Primitive::kPrimNot:
2203     case Primitive::kPrimLong:
2204       locations->SetInAt(0, Location::RegisterLocation(RAX));
2205       break;
2206 
2207     case Primitive::kPrimFloat:
2208     case Primitive::kPrimDouble:
2209       locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2210       break;
2211 
2212     default:
2213       LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2214   }
2215 }
2216 
VisitReturn(HReturn * ret)2217 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2218   if (kIsDebugBuild) {
2219     switch (ret->InputAt(0)->GetType()) {
2220       case Primitive::kPrimBoolean:
2221       case Primitive::kPrimByte:
2222       case Primitive::kPrimChar:
2223       case Primitive::kPrimShort:
2224       case Primitive::kPrimInt:
2225       case Primitive::kPrimNot:
2226       case Primitive::kPrimLong:
2227         DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2228         break;
2229 
2230       case Primitive::kPrimFloat:
2231       case Primitive::kPrimDouble:
2232         DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2233                   XMM0);
2234         break;
2235 
2236       default:
2237         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2238     }
2239   }
2240   codegen_->GenerateFrameExit();
2241 }
2242 
GetReturnLocation(Primitive::Type type) const2243 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
2244   switch (type) {
2245     case Primitive::kPrimBoolean:
2246     case Primitive::kPrimByte:
2247     case Primitive::kPrimChar:
2248     case Primitive::kPrimShort:
2249     case Primitive::kPrimInt:
2250     case Primitive::kPrimNot:
2251     case Primitive::kPrimLong:
2252       return Location::RegisterLocation(RAX);
2253 
2254     case Primitive::kPrimVoid:
2255       return Location::NoLocation();
2256 
2257     case Primitive::kPrimDouble:
2258     case Primitive::kPrimFloat:
2259       return Location::FpuRegisterLocation(XMM0);
2260   }
2261 
2262   UNREACHABLE();
2263 }
2264 
GetMethodLocation() const2265 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2266   return Location::RegisterLocation(kMethodRegisterArgument);
2267 }
2268 
GetNextLocation(Primitive::Type type)2269 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
2270   switch (type) {
2271     case Primitive::kPrimBoolean:
2272     case Primitive::kPrimByte:
2273     case Primitive::kPrimChar:
2274     case Primitive::kPrimShort:
2275     case Primitive::kPrimInt:
2276     case Primitive::kPrimNot: {
2277       uint32_t index = gp_index_++;
2278       stack_index_++;
2279       if (index < calling_convention.GetNumberOfRegisters()) {
2280         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2281       } else {
2282         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2283       }
2284     }
2285 
2286     case Primitive::kPrimLong: {
2287       uint32_t index = gp_index_;
2288       stack_index_ += 2;
2289       if (index < calling_convention.GetNumberOfRegisters()) {
2290         gp_index_ += 1;
2291         return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2292       } else {
2293         gp_index_ += 2;
2294         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2295       }
2296     }
2297 
2298     case Primitive::kPrimFloat: {
2299       uint32_t index = float_index_++;
2300       stack_index_++;
2301       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2302         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2303       } else {
2304         return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2305       }
2306     }
2307 
2308     case Primitive::kPrimDouble: {
2309       uint32_t index = float_index_++;
2310       stack_index_ += 2;
2311       if (index < calling_convention.GetNumberOfFpuRegisters()) {
2312         return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2313       } else {
2314         return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2315       }
2316     }
2317 
2318     case Primitive::kPrimVoid:
2319       LOG(FATAL) << "Unexpected parameter type " << type;
2320       break;
2321   }
2322   return Location::NoLocation();
2323 }
2324 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2325 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2326   // The trampoline uses the same calling convention as dex calling conventions,
2327   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2328   // the method_idx.
2329   HandleInvoke(invoke);
2330 }
2331 
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2332 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2333   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2334 }
2335 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2336 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2337   // Explicit clinit checks triggered by static invokes must have been pruned by
2338   // art::PrepareForRegisterAllocation.
2339   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2340 
2341   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2342   if (intrinsic.TryDispatch(invoke)) {
2343     return;
2344   }
2345 
2346   HandleInvoke(invoke);
2347 }
2348 
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2349 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2350   if (invoke->GetLocations()->Intrinsified()) {
2351     IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2352     intrinsic.Dispatch(invoke);
2353     return true;
2354   }
2355   return false;
2356 }
2357 
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2358 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2359   // Explicit clinit checks triggered by static invokes must have been pruned by
2360   // art::PrepareForRegisterAllocation.
2361   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2362 
2363   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2364     return;
2365   }
2366 
2367   LocationSummary* locations = invoke->GetLocations();
2368   codegen_->GenerateStaticOrDirectCall(
2369       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2370   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2371 }
2372 
HandleInvoke(HInvoke * invoke)2373 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2374   InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2375   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2376 }
2377 
VisitInvokeVirtual(HInvokeVirtual * invoke)2378 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2379   IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2380   if (intrinsic.TryDispatch(invoke)) {
2381     return;
2382   }
2383 
2384   HandleInvoke(invoke);
2385 }
2386 
VisitInvokeVirtual(HInvokeVirtual * invoke)2387 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2388   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2389     return;
2390   }
2391 
2392   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2393   DCHECK(!codegen_->IsLeafMethod());
2394   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2395 }
2396 
VisitInvokeInterface(HInvokeInterface * invoke)2397 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2398   HandleInvoke(invoke);
2399   // Add the hidden argument.
2400   invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2401 }
2402 
VisitInvokeInterface(HInvokeInterface * invoke)2403 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2404   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2405   LocationSummary* locations = invoke->GetLocations();
2406   CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2407   CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2408   Location receiver = locations->InAt(0);
2409   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2410 
2411   // Set the hidden argument. This is safe to do this here, as RAX
2412   // won't be modified thereafter, before the `call` instruction.
2413   DCHECK_EQ(RAX, hidden_reg.AsRegister());
2414   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2415 
2416   if (receiver.IsStackSlot()) {
2417     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2418     // /* HeapReference<Class> */ temp = temp->klass_
2419     __ movl(temp, Address(temp, class_offset));
2420   } else {
2421     // /* HeapReference<Class> */ temp = receiver->klass_
2422     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2423   }
2424   codegen_->MaybeRecordImplicitNullCheck(invoke);
2425   // Instead of simply (possibly) unpoisoning `temp` here, we should
2426   // emit a read barrier for the previous class reference load.
2427   // However this is not required in practice, as this is an
2428   // intermediate/temporary reference and because the current
2429   // concurrent copying collector keeps the from-space memory
2430   // intact/accessible until the end of the marking phase (the
2431   // concurrent copying collector may not in the future).
2432   __ MaybeUnpoisonHeapReference(temp);
2433   // temp = temp->GetAddressOfIMT()
2434   __ movq(temp,
2435       Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2436   // temp = temp->GetImtEntryAt(method_offset);
2437   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2438       invoke->GetImtIndex(), kX86_64PointerSize));
2439   // temp = temp->GetImtEntryAt(method_offset);
2440   __ movq(temp, Address(temp, method_offset));
2441   // call temp->GetEntryPoint();
2442   __ call(Address(
2443       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2444 
2445   DCHECK(!codegen_->IsLeafMethod());
2446   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2447 }
2448 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2449 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2450   HandleInvoke(invoke);
2451 }
2452 
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2453 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2454   codegen_->GenerateInvokePolymorphicCall(invoke);
2455 }
2456 
VisitNeg(HNeg * neg)2457 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2458   LocationSummary* locations =
2459       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2460   switch (neg->GetResultType()) {
2461     case Primitive::kPrimInt:
2462     case Primitive::kPrimLong:
2463       locations->SetInAt(0, Location::RequiresRegister());
2464       locations->SetOut(Location::SameAsFirstInput());
2465       break;
2466 
2467     case Primitive::kPrimFloat:
2468     case Primitive::kPrimDouble:
2469       locations->SetInAt(0, Location::RequiresFpuRegister());
2470       locations->SetOut(Location::SameAsFirstInput());
2471       locations->AddTemp(Location::RequiresFpuRegister());
2472       break;
2473 
2474     default:
2475       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2476   }
2477 }
2478 
VisitNeg(HNeg * neg)2479 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2480   LocationSummary* locations = neg->GetLocations();
2481   Location out = locations->Out();
2482   Location in = locations->InAt(0);
2483   switch (neg->GetResultType()) {
2484     case Primitive::kPrimInt:
2485       DCHECK(in.IsRegister());
2486       DCHECK(in.Equals(out));
2487       __ negl(out.AsRegister<CpuRegister>());
2488       break;
2489 
2490     case Primitive::kPrimLong:
2491       DCHECK(in.IsRegister());
2492       DCHECK(in.Equals(out));
2493       __ negq(out.AsRegister<CpuRegister>());
2494       break;
2495 
2496     case Primitive::kPrimFloat: {
2497       DCHECK(in.Equals(out));
2498       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2499       // Implement float negation with an exclusive or with value
2500       // 0x80000000 (mask for bit 31, representing the sign of a
2501       // single-precision floating-point number).
2502       __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2503       __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2504       break;
2505     }
2506 
2507     case Primitive::kPrimDouble: {
2508       DCHECK(in.Equals(out));
2509       XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2510       // Implement double negation with an exclusive or with value
2511       // 0x8000000000000000 (mask for bit 63, representing the sign of
2512       // a double-precision floating-point number).
2513       __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2514       __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2515       break;
2516     }
2517 
2518     default:
2519       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2520   }
2521 }
2522 
VisitTypeConversion(HTypeConversion * conversion)2523 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2524   LocationSummary* locations =
2525       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
2526   Primitive::Type result_type = conversion->GetResultType();
2527   Primitive::Type input_type = conversion->GetInputType();
2528   DCHECK_NE(result_type, input_type);
2529 
2530   // The Java language does not allow treating boolean as an integral type but
2531   // our bit representation makes it safe.
2532 
2533   switch (result_type) {
2534     case Primitive::kPrimByte:
2535       switch (input_type) {
2536         case Primitive::kPrimLong:
2537           // Type conversion from long to byte is a result of code transformations.
2538         case Primitive::kPrimBoolean:
2539           // Boolean input is a result of code transformations.
2540         case Primitive::kPrimShort:
2541         case Primitive::kPrimInt:
2542         case Primitive::kPrimChar:
2543           // Processing a Dex `int-to-byte' instruction.
2544           locations->SetInAt(0, Location::Any());
2545           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2546           break;
2547 
2548         default:
2549           LOG(FATAL) << "Unexpected type conversion from " << input_type
2550                      << " to " << result_type;
2551       }
2552       break;
2553 
2554     case Primitive::kPrimShort:
2555       switch (input_type) {
2556         case Primitive::kPrimLong:
2557           // Type conversion from long to short is a result of code transformations.
2558         case Primitive::kPrimBoolean:
2559           // Boolean input is a result of code transformations.
2560         case Primitive::kPrimByte:
2561         case Primitive::kPrimInt:
2562         case Primitive::kPrimChar:
2563           // Processing a Dex `int-to-short' instruction.
2564           locations->SetInAt(0, Location::Any());
2565           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2566           break;
2567 
2568         default:
2569           LOG(FATAL) << "Unexpected type conversion from " << input_type
2570                      << " to " << result_type;
2571       }
2572       break;
2573 
2574     case Primitive::kPrimInt:
2575       switch (input_type) {
2576         case Primitive::kPrimLong:
2577           // Processing a Dex `long-to-int' instruction.
2578           locations->SetInAt(0, Location::Any());
2579           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2580           break;
2581 
2582         case Primitive::kPrimFloat:
2583           // Processing a Dex `float-to-int' instruction.
2584           locations->SetInAt(0, Location::RequiresFpuRegister());
2585           locations->SetOut(Location::RequiresRegister());
2586           break;
2587 
2588         case Primitive::kPrimDouble:
2589           // Processing a Dex `double-to-int' instruction.
2590           locations->SetInAt(0, Location::RequiresFpuRegister());
2591           locations->SetOut(Location::RequiresRegister());
2592           break;
2593 
2594         default:
2595           LOG(FATAL) << "Unexpected type conversion from " << input_type
2596                      << " to " << result_type;
2597       }
2598       break;
2599 
2600     case Primitive::kPrimLong:
2601       switch (input_type) {
2602         case Primitive::kPrimBoolean:
2603           // Boolean input is a result of code transformations.
2604         case Primitive::kPrimByte:
2605         case Primitive::kPrimShort:
2606         case Primitive::kPrimInt:
2607         case Primitive::kPrimChar:
2608           // Processing a Dex `int-to-long' instruction.
2609           // TODO: We would benefit from a (to-be-implemented)
2610           // Location::RegisterOrStackSlot requirement for this input.
2611           locations->SetInAt(0, Location::RequiresRegister());
2612           locations->SetOut(Location::RequiresRegister());
2613           break;
2614 
2615         case Primitive::kPrimFloat:
2616           // Processing a Dex `float-to-long' instruction.
2617           locations->SetInAt(0, Location::RequiresFpuRegister());
2618           locations->SetOut(Location::RequiresRegister());
2619           break;
2620 
2621         case Primitive::kPrimDouble:
2622           // Processing a Dex `double-to-long' instruction.
2623           locations->SetInAt(0, Location::RequiresFpuRegister());
2624           locations->SetOut(Location::RequiresRegister());
2625           break;
2626 
2627         default:
2628           LOG(FATAL) << "Unexpected type conversion from " << input_type
2629                      << " to " << result_type;
2630       }
2631       break;
2632 
2633     case Primitive::kPrimChar:
2634       switch (input_type) {
2635         case Primitive::kPrimLong:
2636           // Type conversion from long to char is a result of code transformations.
2637         case Primitive::kPrimBoolean:
2638           // Boolean input is a result of code transformations.
2639         case Primitive::kPrimByte:
2640         case Primitive::kPrimShort:
2641         case Primitive::kPrimInt:
2642           // Processing a Dex `int-to-char' instruction.
2643           locations->SetInAt(0, Location::Any());
2644           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2645           break;
2646 
2647         default:
2648           LOG(FATAL) << "Unexpected type conversion from " << input_type
2649                      << " to " << result_type;
2650       }
2651       break;
2652 
2653     case Primitive::kPrimFloat:
2654       switch (input_type) {
2655         case Primitive::kPrimBoolean:
2656           // Boolean input is a result of code transformations.
2657         case Primitive::kPrimByte:
2658         case Primitive::kPrimShort:
2659         case Primitive::kPrimInt:
2660         case Primitive::kPrimChar:
2661           // Processing a Dex `int-to-float' instruction.
2662           locations->SetInAt(0, Location::Any());
2663           locations->SetOut(Location::RequiresFpuRegister());
2664           break;
2665 
2666         case Primitive::kPrimLong:
2667           // Processing a Dex `long-to-float' instruction.
2668           locations->SetInAt(0, Location::Any());
2669           locations->SetOut(Location::RequiresFpuRegister());
2670           break;
2671 
2672         case Primitive::kPrimDouble:
2673           // Processing a Dex `double-to-float' instruction.
2674           locations->SetInAt(0, Location::Any());
2675           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2676           break;
2677 
2678         default:
2679           LOG(FATAL) << "Unexpected type conversion from " << input_type
2680                      << " to " << result_type;
2681       };
2682       break;
2683 
2684     case Primitive::kPrimDouble:
2685       switch (input_type) {
2686         case Primitive::kPrimBoolean:
2687           // Boolean input is a result of code transformations.
2688         case Primitive::kPrimByte:
2689         case Primitive::kPrimShort:
2690         case Primitive::kPrimInt:
2691         case Primitive::kPrimChar:
2692           // Processing a Dex `int-to-double' instruction.
2693           locations->SetInAt(0, Location::Any());
2694           locations->SetOut(Location::RequiresFpuRegister());
2695           break;
2696 
2697         case Primitive::kPrimLong:
2698           // Processing a Dex `long-to-double' instruction.
2699           locations->SetInAt(0, Location::Any());
2700           locations->SetOut(Location::RequiresFpuRegister());
2701           break;
2702 
2703         case Primitive::kPrimFloat:
2704           // Processing a Dex `float-to-double' instruction.
2705           locations->SetInAt(0, Location::Any());
2706           locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2707           break;
2708 
2709         default:
2710           LOG(FATAL) << "Unexpected type conversion from " << input_type
2711                      << " to " << result_type;
2712       }
2713       break;
2714 
2715     default:
2716       LOG(FATAL) << "Unexpected type conversion from " << input_type
2717                  << " to " << result_type;
2718   }
2719 }
2720 
VisitTypeConversion(HTypeConversion * conversion)2721 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2722   LocationSummary* locations = conversion->GetLocations();
2723   Location out = locations->Out();
2724   Location in = locations->InAt(0);
2725   Primitive::Type result_type = conversion->GetResultType();
2726   Primitive::Type input_type = conversion->GetInputType();
2727   DCHECK_NE(result_type, input_type);
2728   switch (result_type) {
2729     case Primitive::kPrimByte:
2730       switch (input_type) {
2731         case Primitive::kPrimLong:
2732           // Type conversion from long to byte is a result of code transformations.
2733         case Primitive::kPrimBoolean:
2734           // Boolean input is a result of code transformations.
2735         case Primitive::kPrimShort:
2736         case Primitive::kPrimInt:
2737         case Primitive::kPrimChar:
2738           // Processing a Dex `int-to-byte' instruction.
2739           if (in.IsRegister()) {
2740             __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2741           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2742             __ movsxb(out.AsRegister<CpuRegister>(),
2743                       Address(CpuRegister(RSP), in.GetStackIndex()));
2744           } else {
2745             __ movl(out.AsRegister<CpuRegister>(),
2746                     Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2747           }
2748           break;
2749 
2750         default:
2751           LOG(FATAL) << "Unexpected type conversion from " << input_type
2752                      << " to " << result_type;
2753       }
2754       break;
2755 
2756     case Primitive::kPrimShort:
2757       switch (input_type) {
2758         case Primitive::kPrimLong:
2759           // Type conversion from long to short is a result of code transformations.
2760         case Primitive::kPrimBoolean:
2761           // Boolean input is a result of code transformations.
2762         case Primitive::kPrimByte:
2763         case Primitive::kPrimInt:
2764         case Primitive::kPrimChar:
2765           // Processing a Dex `int-to-short' instruction.
2766           if (in.IsRegister()) {
2767             __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2768           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2769             __ movsxw(out.AsRegister<CpuRegister>(),
2770                       Address(CpuRegister(RSP), in.GetStackIndex()));
2771           } else {
2772             __ movl(out.AsRegister<CpuRegister>(),
2773                     Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2774           }
2775           break;
2776 
2777         default:
2778           LOG(FATAL) << "Unexpected type conversion from " << input_type
2779                      << " to " << result_type;
2780       }
2781       break;
2782 
2783     case Primitive::kPrimInt:
2784       switch (input_type) {
2785         case Primitive::kPrimLong:
2786           // Processing a Dex `long-to-int' instruction.
2787           if (in.IsRegister()) {
2788             __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2789           } else if (in.IsDoubleStackSlot()) {
2790             __ movl(out.AsRegister<CpuRegister>(),
2791                     Address(CpuRegister(RSP), in.GetStackIndex()));
2792           } else {
2793             DCHECK(in.IsConstant());
2794             DCHECK(in.GetConstant()->IsLongConstant());
2795             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2796             __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2797           }
2798           break;
2799 
2800         case Primitive::kPrimFloat: {
2801           // Processing a Dex `float-to-int' instruction.
2802           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2803           CpuRegister output = out.AsRegister<CpuRegister>();
2804           NearLabel done, nan;
2805 
2806           __ movl(output, Immediate(kPrimIntMax));
2807           // if input >= (float)INT_MAX goto done
2808           __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2809           __ j(kAboveEqual, &done);
2810           // if input == NaN goto nan
2811           __ j(kUnordered, &nan);
2812           // output = float-to-int-truncate(input)
2813           __ cvttss2si(output, input, false);
2814           __ jmp(&done);
2815           __ Bind(&nan);
2816           //  output = 0
2817           __ xorl(output, output);
2818           __ Bind(&done);
2819           break;
2820         }
2821 
2822         case Primitive::kPrimDouble: {
2823           // Processing a Dex `double-to-int' instruction.
2824           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2825           CpuRegister output = out.AsRegister<CpuRegister>();
2826           NearLabel done, nan;
2827 
2828           __ movl(output, Immediate(kPrimIntMax));
2829           // if input >= (double)INT_MAX goto done
2830           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2831           __ j(kAboveEqual, &done);
2832           // if input == NaN goto nan
2833           __ j(kUnordered, &nan);
2834           // output = double-to-int-truncate(input)
2835           __ cvttsd2si(output, input);
2836           __ jmp(&done);
2837           __ Bind(&nan);
2838           //  output = 0
2839           __ xorl(output, output);
2840           __ Bind(&done);
2841           break;
2842         }
2843 
2844         default:
2845           LOG(FATAL) << "Unexpected type conversion from " << input_type
2846                      << " to " << result_type;
2847       }
2848       break;
2849 
2850     case Primitive::kPrimLong:
2851       switch (input_type) {
2852         DCHECK(out.IsRegister());
2853         case Primitive::kPrimBoolean:
2854           // Boolean input is a result of code transformations.
2855         case Primitive::kPrimByte:
2856         case Primitive::kPrimShort:
2857         case Primitive::kPrimInt:
2858         case Primitive::kPrimChar:
2859           // Processing a Dex `int-to-long' instruction.
2860           DCHECK(in.IsRegister());
2861           __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2862           break;
2863 
2864         case Primitive::kPrimFloat: {
2865           // Processing a Dex `float-to-long' instruction.
2866           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2867           CpuRegister output = out.AsRegister<CpuRegister>();
2868           NearLabel done, nan;
2869 
2870           codegen_->Load64BitValue(output, kPrimLongMax);
2871           // if input >= (float)LONG_MAX goto done
2872           __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2873           __ j(kAboveEqual, &done);
2874           // if input == NaN goto nan
2875           __ j(kUnordered, &nan);
2876           // output = float-to-long-truncate(input)
2877           __ cvttss2si(output, input, true);
2878           __ jmp(&done);
2879           __ Bind(&nan);
2880           //  output = 0
2881           __ xorl(output, output);
2882           __ Bind(&done);
2883           break;
2884         }
2885 
2886         case Primitive::kPrimDouble: {
2887           // Processing a Dex `double-to-long' instruction.
2888           XmmRegister input = in.AsFpuRegister<XmmRegister>();
2889           CpuRegister output = out.AsRegister<CpuRegister>();
2890           NearLabel done, nan;
2891 
2892           codegen_->Load64BitValue(output, kPrimLongMax);
2893           // if input >= (double)LONG_MAX goto done
2894           __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2895           __ j(kAboveEqual, &done);
2896           // if input == NaN goto nan
2897           __ j(kUnordered, &nan);
2898           // output = double-to-long-truncate(input)
2899           __ cvttsd2si(output, input, true);
2900           __ jmp(&done);
2901           __ Bind(&nan);
2902           //  output = 0
2903           __ xorl(output, output);
2904           __ Bind(&done);
2905           break;
2906         }
2907 
2908         default:
2909           LOG(FATAL) << "Unexpected type conversion from " << input_type
2910                      << " to " << result_type;
2911       }
2912       break;
2913 
2914     case Primitive::kPrimChar:
2915       switch (input_type) {
2916         case Primitive::kPrimLong:
2917           // Type conversion from long to char is a result of code transformations.
2918         case Primitive::kPrimBoolean:
2919           // Boolean input is a result of code transformations.
2920         case Primitive::kPrimByte:
2921         case Primitive::kPrimShort:
2922         case Primitive::kPrimInt:
2923           // Processing a Dex `int-to-char' instruction.
2924           if (in.IsRegister()) {
2925             __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2926           } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2927             __ movzxw(out.AsRegister<CpuRegister>(),
2928                       Address(CpuRegister(RSP), in.GetStackIndex()));
2929           } else {
2930             __ movl(out.AsRegister<CpuRegister>(),
2931                     Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2932           }
2933           break;
2934 
2935         default:
2936           LOG(FATAL) << "Unexpected type conversion from " << input_type
2937                      << " to " << result_type;
2938       }
2939       break;
2940 
2941     case Primitive::kPrimFloat:
2942       switch (input_type) {
2943         case Primitive::kPrimBoolean:
2944           // Boolean input is a result of code transformations.
2945         case Primitive::kPrimByte:
2946         case Primitive::kPrimShort:
2947         case Primitive::kPrimInt:
2948         case Primitive::kPrimChar:
2949           // Processing a Dex `int-to-float' instruction.
2950           if (in.IsRegister()) {
2951             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2952           } else if (in.IsConstant()) {
2953             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2954             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2955             codegen_->Load32BitValue(dest, static_cast<float>(v));
2956           } else {
2957             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2958                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
2959           }
2960           break;
2961 
2962         case Primitive::kPrimLong:
2963           // Processing a Dex `long-to-float' instruction.
2964           if (in.IsRegister()) {
2965             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2966           } else if (in.IsConstant()) {
2967             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2968             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2969             codegen_->Load32BitValue(dest, static_cast<float>(v));
2970           } else {
2971             __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2972                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
2973           }
2974           break;
2975 
2976         case Primitive::kPrimDouble:
2977           // Processing a Dex `double-to-float' instruction.
2978           if (in.IsFpuRegister()) {
2979             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2980           } else if (in.IsConstant()) {
2981             double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2982             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2983             codegen_->Load32BitValue(dest, static_cast<float>(v));
2984           } else {
2985             __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2986                         Address(CpuRegister(RSP), in.GetStackIndex()));
2987           }
2988           break;
2989 
2990         default:
2991           LOG(FATAL) << "Unexpected type conversion from " << input_type
2992                      << " to " << result_type;
2993       };
2994       break;
2995 
2996     case Primitive::kPrimDouble:
2997       switch (input_type) {
2998         case Primitive::kPrimBoolean:
2999           // Boolean input is a result of code transformations.
3000         case Primitive::kPrimByte:
3001         case Primitive::kPrimShort:
3002         case Primitive::kPrimInt:
3003         case Primitive::kPrimChar:
3004           // Processing a Dex `int-to-double' instruction.
3005           if (in.IsRegister()) {
3006             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3007           } else if (in.IsConstant()) {
3008             int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3009             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3010             codegen_->Load64BitValue(dest, static_cast<double>(v));
3011           } else {
3012             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3013                         Address(CpuRegister(RSP), in.GetStackIndex()), false);
3014           }
3015           break;
3016 
3017         case Primitive::kPrimLong:
3018           // Processing a Dex `long-to-double' instruction.
3019           if (in.IsRegister()) {
3020             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3021           } else if (in.IsConstant()) {
3022             int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3023             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3024             codegen_->Load64BitValue(dest, static_cast<double>(v));
3025           } else {
3026             __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3027                         Address(CpuRegister(RSP), in.GetStackIndex()), true);
3028           }
3029           break;
3030 
3031         case Primitive::kPrimFloat:
3032           // Processing a Dex `float-to-double' instruction.
3033           if (in.IsFpuRegister()) {
3034             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3035           } else if (in.IsConstant()) {
3036             float v = in.GetConstant()->AsFloatConstant()->GetValue();
3037             XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3038             codegen_->Load64BitValue(dest, static_cast<double>(v));
3039           } else {
3040             __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3041                         Address(CpuRegister(RSP), in.GetStackIndex()));
3042           }
3043           break;
3044 
3045         default:
3046           LOG(FATAL) << "Unexpected type conversion from " << input_type
3047                      << " to " << result_type;
3048       };
3049       break;
3050 
3051     default:
3052       LOG(FATAL) << "Unexpected type conversion from " << input_type
3053                  << " to " << result_type;
3054   }
3055 }
3056 
VisitAdd(HAdd * add)3057 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3058   LocationSummary* locations =
3059       new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
3060   switch (add->GetResultType()) {
3061     case Primitive::kPrimInt: {
3062       locations->SetInAt(0, Location::RequiresRegister());
3063       locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3064       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3065       break;
3066     }
3067 
3068     case Primitive::kPrimLong: {
3069       locations->SetInAt(0, Location::RequiresRegister());
3070       // We can use a leaq or addq if the constant can fit in an immediate.
3071       locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3072       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3073       break;
3074     }
3075 
3076     case Primitive::kPrimDouble:
3077     case Primitive::kPrimFloat: {
3078       locations->SetInAt(0, Location::RequiresFpuRegister());
3079       locations->SetInAt(1, Location::Any());
3080       locations->SetOut(Location::SameAsFirstInput());
3081       break;
3082     }
3083 
3084     default:
3085       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3086   }
3087 }
3088 
VisitAdd(HAdd * add)3089 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3090   LocationSummary* locations = add->GetLocations();
3091   Location first = locations->InAt(0);
3092   Location second = locations->InAt(1);
3093   Location out = locations->Out();
3094 
3095   switch (add->GetResultType()) {
3096     case Primitive::kPrimInt: {
3097       if (second.IsRegister()) {
3098         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3099           __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3100         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3101           __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3102         } else {
3103           __ leal(out.AsRegister<CpuRegister>(), Address(
3104               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3105         }
3106       } else if (second.IsConstant()) {
3107         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3108           __ addl(out.AsRegister<CpuRegister>(),
3109                   Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3110         } else {
3111           __ leal(out.AsRegister<CpuRegister>(), Address(
3112               first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3113         }
3114       } else {
3115         DCHECK(first.Equals(locations->Out()));
3116         __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3117       }
3118       break;
3119     }
3120 
3121     case Primitive::kPrimLong: {
3122       if (second.IsRegister()) {
3123         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3124           __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3125         } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3126           __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3127         } else {
3128           __ leaq(out.AsRegister<CpuRegister>(), Address(
3129               first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3130         }
3131       } else {
3132         DCHECK(second.IsConstant());
3133         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3134         int32_t int32_value = Low32Bits(value);
3135         DCHECK_EQ(int32_value, value);
3136         if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3137           __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3138         } else {
3139           __ leaq(out.AsRegister<CpuRegister>(), Address(
3140               first.AsRegister<CpuRegister>(), int32_value));
3141         }
3142       }
3143       break;
3144     }
3145 
3146     case Primitive::kPrimFloat: {
3147       if (second.IsFpuRegister()) {
3148         __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3149       } else if (second.IsConstant()) {
3150         __ addss(first.AsFpuRegister<XmmRegister>(),
3151                  codegen_->LiteralFloatAddress(
3152                      second.GetConstant()->AsFloatConstant()->GetValue()));
3153       } else {
3154         DCHECK(second.IsStackSlot());
3155         __ addss(first.AsFpuRegister<XmmRegister>(),
3156                  Address(CpuRegister(RSP), second.GetStackIndex()));
3157       }
3158       break;
3159     }
3160 
3161     case Primitive::kPrimDouble: {
3162       if (second.IsFpuRegister()) {
3163         __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3164       } else if (second.IsConstant()) {
3165         __ addsd(first.AsFpuRegister<XmmRegister>(),
3166                  codegen_->LiteralDoubleAddress(
3167                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3168       } else {
3169         DCHECK(second.IsDoubleStackSlot());
3170         __ addsd(first.AsFpuRegister<XmmRegister>(),
3171                  Address(CpuRegister(RSP), second.GetStackIndex()));
3172       }
3173       break;
3174     }
3175 
3176     default:
3177       LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3178   }
3179 }
3180 
VisitSub(HSub * sub)3181 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3182   LocationSummary* locations =
3183       new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
3184   switch (sub->GetResultType()) {
3185     case Primitive::kPrimInt: {
3186       locations->SetInAt(0, Location::RequiresRegister());
3187       locations->SetInAt(1, Location::Any());
3188       locations->SetOut(Location::SameAsFirstInput());
3189       break;
3190     }
3191     case Primitive::kPrimLong: {
3192       locations->SetInAt(0, Location::RequiresRegister());
3193       locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3194       locations->SetOut(Location::SameAsFirstInput());
3195       break;
3196     }
3197     case Primitive::kPrimFloat:
3198     case Primitive::kPrimDouble: {
3199       locations->SetInAt(0, Location::RequiresFpuRegister());
3200       locations->SetInAt(1, Location::Any());
3201       locations->SetOut(Location::SameAsFirstInput());
3202       break;
3203     }
3204     default:
3205       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3206   }
3207 }
3208 
VisitSub(HSub * sub)3209 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3210   LocationSummary* locations = sub->GetLocations();
3211   Location first = locations->InAt(0);
3212   Location second = locations->InAt(1);
3213   DCHECK(first.Equals(locations->Out()));
3214   switch (sub->GetResultType()) {
3215     case Primitive::kPrimInt: {
3216       if (second.IsRegister()) {
3217         __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3218       } else if (second.IsConstant()) {
3219         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3220         __ subl(first.AsRegister<CpuRegister>(), imm);
3221       } else {
3222         __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3223       }
3224       break;
3225     }
3226     case Primitive::kPrimLong: {
3227       if (second.IsConstant()) {
3228         int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3229         DCHECK(IsInt<32>(value));
3230         __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3231       } else {
3232         __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3233       }
3234       break;
3235     }
3236 
3237     case Primitive::kPrimFloat: {
3238       if (second.IsFpuRegister()) {
3239         __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3240       } else if (second.IsConstant()) {
3241         __ subss(first.AsFpuRegister<XmmRegister>(),
3242                  codegen_->LiteralFloatAddress(
3243                      second.GetConstant()->AsFloatConstant()->GetValue()));
3244       } else {
3245         DCHECK(second.IsStackSlot());
3246         __ subss(first.AsFpuRegister<XmmRegister>(),
3247                  Address(CpuRegister(RSP), second.GetStackIndex()));
3248       }
3249       break;
3250     }
3251 
3252     case Primitive::kPrimDouble: {
3253       if (second.IsFpuRegister()) {
3254         __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3255       } else if (second.IsConstant()) {
3256         __ subsd(first.AsFpuRegister<XmmRegister>(),
3257                  codegen_->LiteralDoubleAddress(
3258                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3259       } else {
3260         DCHECK(second.IsDoubleStackSlot());
3261         __ subsd(first.AsFpuRegister<XmmRegister>(),
3262                  Address(CpuRegister(RSP), second.GetStackIndex()));
3263       }
3264       break;
3265     }
3266 
3267     default:
3268       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3269   }
3270 }
3271 
VisitMul(HMul * mul)3272 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3273   LocationSummary* locations =
3274       new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3275   switch (mul->GetResultType()) {
3276     case Primitive::kPrimInt: {
3277       locations->SetInAt(0, Location::RequiresRegister());
3278       locations->SetInAt(1, Location::Any());
3279       if (mul->InputAt(1)->IsIntConstant()) {
3280         // Can use 3 operand multiply.
3281         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3282       } else {
3283         locations->SetOut(Location::SameAsFirstInput());
3284       }
3285       break;
3286     }
3287     case Primitive::kPrimLong: {
3288       locations->SetInAt(0, Location::RequiresRegister());
3289       locations->SetInAt(1, Location::Any());
3290       if (mul->InputAt(1)->IsLongConstant() &&
3291           IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3292         // Can use 3 operand multiply.
3293         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3294       } else {
3295         locations->SetOut(Location::SameAsFirstInput());
3296       }
3297       break;
3298     }
3299     case Primitive::kPrimFloat:
3300     case Primitive::kPrimDouble: {
3301       locations->SetInAt(0, Location::RequiresFpuRegister());
3302       locations->SetInAt(1, Location::Any());
3303       locations->SetOut(Location::SameAsFirstInput());
3304       break;
3305     }
3306 
3307     default:
3308       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3309   }
3310 }
3311 
VisitMul(HMul * mul)3312 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3313   LocationSummary* locations = mul->GetLocations();
3314   Location first = locations->InAt(0);
3315   Location second = locations->InAt(1);
3316   Location out = locations->Out();
3317   switch (mul->GetResultType()) {
3318     case Primitive::kPrimInt:
3319       // The constant may have ended up in a register, so test explicitly to avoid
3320       // problems where the output may not be the same as the first operand.
3321       if (mul->InputAt(1)->IsIntConstant()) {
3322         Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3323         __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3324       } else if (second.IsRegister()) {
3325         DCHECK(first.Equals(out));
3326         __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3327       } else {
3328         DCHECK(first.Equals(out));
3329         DCHECK(second.IsStackSlot());
3330         __ imull(first.AsRegister<CpuRegister>(),
3331                  Address(CpuRegister(RSP), second.GetStackIndex()));
3332       }
3333       break;
3334     case Primitive::kPrimLong: {
3335       // The constant may have ended up in a register, so test explicitly to avoid
3336       // problems where the output may not be the same as the first operand.
3337       if (mul->InputAt(1)->IsLongConstant()) {
3338         int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3339         if (IsInt<32>(value)) {
3340           __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3341                    Immediate(static_cast<int32_t>(value)));
3342         } else {
3343           // Have to use the constant area.
3344           DCHECK(first.Equals(out));
3345           __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3346         }
3347       } else if (second.IsRegister()) {
3348         DCHECK(first.Equals(out));
3349         __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3350       } else {
3351         DCHECK(second.IsDoubleStackSlot());
3352         DCHECK(first.Equals(out));
3353         __ imulq(first.AsRegister<CpuRegister>(),
3354                  Address(CpuRegister(RSP), second.GetStackIndex()));
3355       }
3356       break;
3357     }
3358 
3359     case Primitive::kPrimFloat: {
3360       DCHECK(first.Equals(out));
3361       if (second.IsFpuRegister()) {
3362         __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3363       } else if (second.IsConstant()) {
3364         __ mulss(first.AsFpuRegister<XmmRegister>(),
3365                  codegen_->LiteralFloatAddress(
3366                      second.GetConstant()->AsFloatConstant()->GetValue()));
3367       } else {
3368         DCHECK(second.IsStackSlot());
3369         __ mulss(first.AsFpuRegister<XmmRegister>(),
3370                  Address(CpuRegister(RSP), second.GetStackIndex()));
3371       }
3372       break;
3373     }
3374 
3375     case Primitive::kPrimDouble: {
3376       DCHECK(first.Equals(out));
3377       if (second.IsFpuRegister()) {
3378         __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3379       } else if (second.IsConstant()) {
3380         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3381                  codegen_->LiteralDoubleAddress(
3382                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3383       } else {
3384         DCHECK(second.IsDoubleStackSlot());
3385         __ mulsd(first.AsFpuRegister<XmmRegister>(),
3386                  Address(CpuRegister(RSP), second.GetStackIndex()));
3387       }
3388       break;
3389     }
3390 
3391     default:
3392       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3393   }
3394 }
3395 
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3396 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3397                                                      uint32_t stack_adjustment, bool is_float) {
3398   if (source.IsStackSlot()) {
3399     DCHECK(is_float);
3400     __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3401   } else if (source.IsDoubleStackSlot()) {
3402     DCHECK(!is_float);
3403     __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3404   } else {
3405     // Write the value to the temporary location on the stack and load to FP stack.
3406     if (is_float) {
3407       Location stack_temp = Location::StackSlot(temp_offset);
3408       codegen_->Move(stack_temp, source);
3409       __ flds(Address(CpuRegister(RSP), temp_offset));
3410     } else {
3411       Location stack_temp = Location::DoubleStackSlot(temp_offset);
3412       codegen_->Move(stack_temp, source);
3413       __ fldl(Address(CpuRegister(RSP), temp_offset));
3414     }
3415   }
3416 }
3417 
GenerateRemFP(HRem * rem)3418 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3419   Primitive::Type type = rem->GetResultType();
3420   bool is_float = type == Primitive::kPrimFloat;
3421   size_t elem_size = Primitive::ComponentSize(type);
3422   LocationSummary* locations = rem->GetLocations();
3423   Location first = locations->InAt(0);
3424   Location second = locations->InAt(1);
3425   Location out = locations->Out();
3426 
3427   // Create stack space for 2 elements.
3428   // TODO: enhance register allocator to ask for stack temporaries.
3429   __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3430 
3431   // Load the values to the FP stack in reverse order, using temporaries if needed.
3432   PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3433   PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3434 
3435   // Loop doing FPREM until we stabilize.
3436   NearLabel retry;
3437   __ Bind(&retry);
3438   __ fprem();
3439 
3440   // Move FP status to AX.
3441   __ fstsw();
3442 
3443   // And see if the argument reduction is complete. This is signaled by the
3444   // C2 FPU flag bit set to 0.
3445   __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3446   __ j(kNotEqual, &retry);
3447 
3448   // We have settled on the final value. Retrieve it into an XMM register.
3449   // Store FP top of stack to real stack.
3450   if (is_float) {
3451     __ fsts(Address(CpuRegister(RSP), 0));
3452   } else {
3453     __ fstl(Address(CpuRegister(RSP), 0));
3454   }
3455 
3456   // Pop the 2 items from the FP stack.
3457   __ fucompp();
3458 
3459   // Load the value from the stack into an XMM register.
3460   DCHECK(out.IsFpuRegister()) << out;
3461   if (is_float) {
3462     __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3463   } else {
3464     __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3465   }
3466 
3467   // And remove the temporary stack space we allocated.
3468   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3469 }
3470 
DivRemOneOrMinusOne(HBinaryOperation * instruction)3471 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3472   DCHECK(instruction->IsDiv() || instruction->IsRem());
3473 
3474   LocationSummary* locations = instruction->GetLocations();
3475   Location second = locations->InAt(1);
3476   DCHECK(second.IsConstant());
3477 
3478   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3479   CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3480   int64_t imm = Int64FromConstant(second.GetConstant());
3481 
3482   DCHECK(imm == 1 || imm == -1);
3483 
3484   switch (instruction->GetResultType()) {
3485     case Primitive::kPrimInt: {
3486       if (instruction->IsRem()) {
3487         __ xorl(output_register, output_register);
3488       } else {
3489         __ movl(output_register, input_register);
3490         if (imm == -1) {
3491           __ negl(output_register);
3492         }
3493       }
3494       break;
3495     }
3496 
3497     case Primitive::kPrimLong: {
3498       if (instruction->IsRem()) {
3499         __ xorl(output_register, output_register);
3500       } else {
3501         __ movq(output_register, input_register);
3502         if (imm == -1) {
3503           __ negq(output_register);
3504         }
3505       }
3506       break;
3507     }
3508 
3509     default:
3510       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3511   }
3512 }
3513 
DivByPowerOfTwo(HDiv * instruction)3514 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3515   LocationSummary* locations = instruction->GetLocations();
3516   Location second = locations->InAt(1);
3517 
3518   CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3519   CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3520 
3521   int64_t imm = Int64FromConstant(second.GetConstant());
3522   DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3523   uint64_t abs_imm = AbsOrMin(imm);
3524 
3525   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3526 
3527   if (instruction->GetResultType() == Primitive::kPrimInt) {
3528     __ leal(tmp, Address(numerator, abs_imm - 1));
3529     __ testl(numerator, numerator);
3530     __ cmov(kGreaterEqual, tmp, numerator);
3531     int shift = CTZ(imm);
3532     __ sarl(tmp, Immediate(shift));
3533 
3534     if (imm < 0) {
3535       __ negl(tmp);
3536     }
3537 
3538     __ movl(output_register, tmp);
3539   } else {
3540     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3541     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3542 
3543     codegen_->Load64BitValue(rdx, abs_imm - 1);
3544     __ addq(rdx, numerator);
3545     __ testq(numerator, numerator);
3546     __ cmov(kGreaterEqual, rdx, numerator);
3547     int shift = CTZ(imm);
3548     __ sarq(rdx, Immediate(shift));
3549 
3550     if (imm < 0) {
3551       __ negq(rdx);
3552     }
3553 
3554     __ movq(output_register, rdx);
3555   }
3556 }
3557 
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3558 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3559   DCHECK(instruction->IsDiv() || instruction->IsRem());
3560 
3561   LocationSummary* locations = instruction->GetLocations();
3562   Location second = locations->InAt(1);
3563 
3564   CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3565       : locations->GetTemp(0).AsRegister<CpuRegister>();
3566   CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3567   CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3568       : locations->Out().AsRegister<CpuRegister>();
3569   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3570 
3571   DCHECK_EQ(RAX, eax.AsRegister());
3572   DCHECK_EQ(RDX, edx.AsRegister());
3573   if (instruction->IsDiv()) {
3574     DCHECK_EQ(RAX, out.AsRegister());
3575   } else {
3576     DCHECK_EQ(RDX, out.AsRegister());
3577   }
3578 
3579   int64_t magic;
3580   int shift;
3581 
3582   // TODO: can these branches be written as one?
3583   if (instruction->GetResultType() == Primitive::kPrimInt) {
3584     int imm = second.GetConstant()->AsIntConstant()->GetValue();
3585 
3586     CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3587 
3588     __ movl(numerator, eax);
3589 
3590     __ movl(eax, Immediate(magic));
3591     __ imull(numerator);
3592 
3593     if (imm > 0 && magic < 0) {
3594       __ addl(edx, numerator);
3595     } else if (imm < 0 && magic > 0) {
3596       __ subl(edx, numerator);
3597     }
3598 
3599     if (shift != 0) {
3600       __ sarl(edx, Immediate(shift));
3601     }
3602 
3603     __ movl(eax, edx);
3604     __ shrl(edx, Immediate(31));
3605     __ addl(edx, eax);
3606 
3607     if (instruction->IsRem()) {
3608       __ movl(eax, numerator);
3609       __ imull(edx, Immediate(imm));
3610       __ subl(eax, edx);
3611       __ movl(edx, eax);
3612     } else {
3613       __ movl(eax, edx);
3614     }
3615   } else {
3616     int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3617 
3618     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3619 
3620     CpuRegister rax = eax;
3621     CpuRegister rdx = edx;
3622 
3623     CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3624 
3625     // Save the numerator.
3626     __ movq(numerator, rax);
3627 
3628     // RAX = magic
3629     codegen_->Load64BitValue(rax, magic);
3630 
3631     // RDX:RAX = magic * numerator
3632     __ imulq(numerator);
3633 
3634     if (imm > 0 && magic < 0) {
3635       // RDX += numerator
3636       __ addq(rdx, numerator);
3637     } else if (imm < 0 && magic > 0) {
3638       // RDX -= numerator
3639       __ subq(rdx, numerator);
3640     }
3641 
3642     // Shift if needed.
3643     if (shift != 0) {
3644       __ sarq(rdx, Immediate(shift));
3645     }
3646 
3647     // RDX += 1 if RDX < 0
3648     __ movq(rax, rdx);
3649     __ shrq(rdx, Immediate(63));
3650     __ addq(rdx, rax);
3651 
3652     if (instruction->IsRem()) {
3653       __ movq(rax, numerator);
3654 
3655       if (IsInt<32>(imm)) {
3656         __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3657       } else {
3658         __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3659       }
3660 
3661       __ subq(rax, rdx);
3662       __ movq(rdx, rax);
3663     } else {
3664       __ movq(rax, rdx);
3665     }
3666   }
3667 }
3668 
GenerateDivRemIntegral(HBinaryOperation * instruction)3669 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3670   DCHECK(instruction->IsDiv() || instruction->IsRem());
3671   Primitive::Type type = instruction->GetResultType();
3672   DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
3673 
3674   bool is_div = instruction->IsDiv();
3675   LocationSummary* locations = instruction->GetLocations();
3676 
3677   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3678   Location second = locations->InAt(1);
3679 
3680   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3681   DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3682 
3683   if (second.IsConstant()) {
3684     int64_t imm = Int64FromConstant(second.GetConstant());
3685 
3686     if (imm == 0) {
3687       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3688     } else if (imm == 1 || imm == -1) {
3689       DivRemOneOrMinusOne(instruction);
3690     } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3691       DivByPowerOfTwo(instruction->AsDiv());
3692     } else {
3693       DCHECK(imm <= -2 || imm >= 2);
3694       GenerateDivRemWithAnyConstant(instruction);
3695     }
3696   } else {
3697     SlowPathCode* slow_path =
3698         new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
3699             instruction, out.AsRegister(), type, is_div);
3700     codegen_->AddSlowPath(slow_path);
3701 
3702     CpuRegister second_reg = second.AsRegister<CpuRegister>();
3703     // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3704     // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3705     // so it's safe to just use negl instead of more complex comparisons.
3706     if (type == Primitive::kPrimInt) {
3707       __ cmpl(second_reg, Immediate(-1));
3708       __ j(kEqual, slow_path->GetEntryLabel());
3709       // edx:eax <- sign-extended of eax
3710       __ cdq();
3711       // eax = quotient, edx = remainder
3712       __ idivl(second_reg);
3713     } else {
3714       __ cmpq(second_reg, Immediate(-1));
3715       __ j(kEqual, slow_path->GetEntryLabel());
3716       // rdx:rax <- sign-extended of rax
3717       __ cqo();
3718       // rax = quotient, rdx = remainder
3719       __ idivq(second_reg);
3720     }
3721     __ Bind(slow_path->GetExitLabel());
3722   }
3723 }
3724 
VisitDiv(HDiv * div)3725 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3726   LocationSummary* locations =
3727       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3728   switch (div->GetResultType()) {
3729     case Primitive::kPrimInt:
3730     case Primitive::kPrimLong: {
3731       locations->SetInAt(0, Location::RegisterLocation(RAX));
3732       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3733       locations->SetOut(Location::SameAsFirstInput());
3734       // Intel uses edx:eax as the dividend.
3735       locations->AddTemp(Location::RegisterLocation(RDX));
3736       // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3737       // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3738       // output and request another temp.
3739       if (div->InputAt(1)->IsConstant()) {
3740         locations->AddTemp(Location::RequiresRegister());
3741       }
3742       break;
3743     }
3744 
3745     case Primitive::kPrimFloat:
3746     case Primitive::kPrimDouble: {
3747       locations->SetInAt(0, Location::RequiresFpuRegister());
3748       locations->SetInAt(1, Location::Any());
3749       locations->SetOut(Location::SameAsFirstInput());
3750       break;
3751     }
3752 
3753     default:
3754       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3755   }
3756 }
3757 
VisitDiv(HDiv * div)3758 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3759   LocationSummary* locations = div->GetLocations();
3760   Location first = locations->InAt(0);
3761   Location second = locations->InAt(1);
3762   DCHECK(first.Equals(locations->Out()));
3763 
3764   Primitive::Type type = div->GetResultType();
3765   switch (type) {
3766     case Primitive::kPrimInt:
3767     case Primitive::kPrimLong: {
3768       GenerateDivRemIntegral(div);
3769       break;
3770     }
3771 
3772     case Primitive::kPrimFloat: {
3773       if (second.IsFpuRegister()) {
3774         __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3775       } else if (second.IsConstant()) {
3776         __ divss(first.AsFpuRegister<XmmRegister>(),
3777                  codegen_->LiteralFloatAddress(
3778                      second.GetConstant()->AsFloatConstant()->GetValue()));
3779       } else {
3780         DCHECK(second.IsStackSlot());
3781         __ divss(first.AsFpuRegister<XmmRegister>(),
3782                  Address(CpuRegister(RSP), second.GetStackIndex()));
3783       }
3784       break;
3785     }
3786 
3787     case Primitive::kPrimDouble: {
3788       if (second.IsFpuRegister()) {
3789         __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3790       } else if (second.IsConstant()) {
3791         __ divsd(first.AsFpuRegister<XmmRegister>(),
3792                  codegen_->LiteralDoubleAddress(
3793                      second.GetConstant()->AsDoubleConstant()->GetValue()));
3794       } else {
3795         DCHECK(second.IsDoubleStackSlot());
3796         __ divsd(first.AsFpuRegister<XmmRegister>(),
3797                  Address(CpuRegister(RSP), second.GetStackIndex()));
3798       }
3799       break;
3800     }
3801 
3802     default:
3803       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3804   }
3805 }
3806 
VisitRem(HRem * rem)3807 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3808   Primitive::Type type = rem->GetResultType();
3809   LocationSummary* locations =
3810     new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
3811 
3812   switch (type) {
3813     case Primitive::kPrimInt:
3814     case Primitive::kPrimLong: {
3815       locations->SetInAt(0, Location::RegisterLocation(RAX));
3816       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3817       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3818       locations->SetOut(Location::RegisterLocation(RDX));
3819       // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3820       // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3821       // output and request another temp.
3822       if (rem->InputAt(1)->IsConstant()) {
3823         locations->AddTemp(Location::RequiresRegister());
3824       }
3825       break;
3826     }
3827 
3828     case Primitive::kPrimFloat:
3829     case Primitive::kPrimDouble: {
3830       locations->SetInAt(0, Location::Any());
3831       locations->SetInAt(1, Location::Any());
3832       locations->SetOut(Location::RequiresFpuRegister());
3833       locations->AddTemp(Location::RegisterLocation(RAX));
3834       break;
3835     }
3836 
3837     default:
3838       LOG(FATAL) << "Unexpected rem type " << type;
3839   }
3840 }
3841 
VisitRem(HRem * rem)3842 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3843   Primitive::Type type = rem->GetResultType();
3844   switch (type) {
3845     case Primitive::kPrimInt:
3846     case Primitive::kPrimLong: {
3847       GenerateDivRemIntegral(rem);
3848       break;
3849     }
3850     case Primitive::kPrimFloat:
3851     case Primitive::kPrimDouble: {
3852       GenerateRemFP(rem);
3853       break;
3854     }
3855     default:
3856       LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3857   }
3858 }
3859 
VisitDivZeroCheck(HDivZeroCheck * instruction)3860 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3861   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3862   locations->SetInAt(0, Location::Any());
3863 }
3864 
VisitDivZeroCheck(HDivZeroCheck * instruction)3865 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3866   SlowPathCode* slow_path =
3867       new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
3868   codegen_->AddSlowPath(slow_path);
3869 
3870   LocationSummary* locations = instruction->GetLocations();
3871   Location value = locations->InAt(0);
3872 
3873   switch (instruction->GetType()) {
3874     case Primitive::kPrimBoolean:
3875     case Primitive::kPrimByte:
3876     case Primitive::kPrimChar:
3877     case Primitive::kPrimShort:
3878     case Primitive::kPrimInt: {
3879       if (value.IsRegister()) {
3880         __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3881         __ j(kEqual, slow_path->GetEntryLabel());
3882       } else if (value.IsStackSlot()) {
3883         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3884         __ j(kEqual, slow_path->GetEntryLabel());
3885       } else {
3886         DCHECK(value.IsConstant()) << value;
3887         if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3888           __ jmp(slow_path->GetEntryLabel());
3889         }
3890       }
3891       break;
3892     }
3893     case Primitive::kPrimLong: {
3894       if (value.IsRegister()) {
3895         __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3896         __ j(kEqual, slow_path->GetEntryLabel());
3897       } else if (value.IsDoubleStackSlot()) {
3898         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3899         __ j(kEqual, slow_path->GetEntryLabel());
3900       } else {
3901         DCHECK(value.IsConstant()) << value;
3902         if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3903           __ jmp(slow_path->GetEntryLabel());
3904         }
3905       }
3906       break;
3907     }
3908     default:
3909       LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3910   }
3911 }
3912 
HandleShift(HBinaryOperation * op)3913 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3914   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3915 
3916   LocationSummary* locations =
3917       new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3918 
3919   switch (op->GetResultType()) {
3920     case Primitive::kPrimInt:
3921     case Primitive::kPrimLong: {
3922       locations->SetInAt(0, Location::RequiresRegister());
3923       // The shift count needs to be in CL.
3924       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3925       locations->SetOut(Location::SameAsFirstInput());
3926       break;
3927     }
3928     default:
3929       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3930   }
3931 }
3932 
HandleShift(HBinaryOperation * op)3933 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3934   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3935 
3936   LocationSummary* locations = op->GetLocations();
3937   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3938   Location second = locations->InAt(1);
3939 
3940   switch (op->GetResultType()) {
3941     case Primitive::kPrimInt: {
3942       if (second.IsRegister()) {
3943         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3944         if (op->IsShl()) {
3945           __ shll(first_reg, second_reg);
3946         } else if (op->IsShr()) {
3947           __ sarl(first_reg, second_reg);
3948         } else {
3949           __ shrl(first_reg, second_reg);
3950         }
3951       } else {
3952         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3953         if (op->IsShl()) {
3954           __ shll(first_reg, imm);
3955         } else if (op->IsShr()) {
3956           __ sarl(first_reg, imm);
3957         } else {
3958           __ shrl(first_reg, imm);
3959         }
3960       }
3961       break;
3962     }
3963     case Primitive::kPrimLong: {
3964       if (second.IsRegister()) {
3965         CpuRegister second_reg = second.AsRegister<CpuRegister>();
3966         if (op->IsShl()) {
3967           __ shlq(first_reg, second_reg);
3968         } else if (op->IsShr()) {
3969           __ sarq(first_reg, second_reg);
3970         } else {
3971           __ shrq(first_reg, second_reg);
3972         }
3973       } else {
3974         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3975         if (op->IsShl()) {
3976           __ shlq(first_reg, imm);
3977         } else if (op->IsShr()) {
3978           __ sarq(first_reg, imm);
3979         } else {
3980           __ shrq(first_reg, imm);
3981         }
3982       }
3983       break;
3984     }
3985     default:
3986       LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3987       UNREACHABLE();
3988   }
3989 }
3990 
VisitRor(HRor * ror)3991 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3992   LocationSummary* locations =
3993       new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
3994 
3995   switch (ror->GetResultType()) {
3996     case Primitive::kPrimInt:
3997     case Primitive::kPrimLong: {
3998       locations->SetInAt(0, Location::RequiresRegister());
3999       // The shift count needs to be in CL (unless it is a constant).
4000       locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4001       locations->SetOut(Location::SameAsFirstInput());
4002       break;
4003     }
4004     default:
4005       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4006       UNREACHABLE();
4007   }
4008 }
4009 
VisitRor(HRor * ror)4010 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4011   LocationSummary* locations = ror->GetLocations();
4012   CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4013   Location second = locations->InAt(1);
4014 
4015   switch (ror->GetResultType()) {
4016     case Primitive::kPrimInt:
4017       if (second.IsRegister()) {
4018         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4019         __ rorl(first_reg, second_reg);
4020       } else {
4021         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4022         __ rorl(first_reg, imm);
4023       }
4024       break;
4025     case Primitive::kPrimLong:
4026       if (second.IsRegister()) {
4027         CpuRegister second_reg = second.AsRegister<CpuRegister>();
4028         __ rorq(first_reg, second_reg);
4029       } else {
4030         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4031         __ rorq(first_reg, imm);
4032       }
4033       break;
4034     default:
4035       LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4036       UNREACHABLE();
4037   }
4038 }
4039 
VisitShl(HShl * shl)4040 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4041   HandleShift(shl);
4042 }
4043 
VisitShl(HShl * shl)4044 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4045   HandleShift(shl);
4046 }
4047 
VisitShr(HShr * shr)4048 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4049   HandleShift(shr);
4050 }
4051 
VisitShr(HShr * shr)4052 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4053   HandleShift(shr);
4054 }
4055 
VisitUShr(HUShr * ushr)4056 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4057   HandleShift(ushr);
4058 }
4059 
VisitUShr(HUShr * ushr)4060 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4061   HandleShift(ushr);
4062 }
4063 
VisitNewInstance(HNewInstance * instruction)4064 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4065   LocationSummary* locations =
4066       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
4067   InvokeRuntimeCallingConvention calling_convention;
4068   if (instruction->IsStringAlloc()) {
4069     locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
4070   } else {
4071     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4072   }
4073   locations->SetOut(Location::RegisterLocation(RAX));
4074 }
4075 
VisitNewInstance(HNewInstance * instruction)4076 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4077   // Note: if heap poisoning is enabled, the entry point takes cares
4078   // of poisoning the reference.
4079   if (instruction->IsStringAlloc()) {
4080     // String is allocated through StringFactory. Call NewEmptyString entry point.
4081     CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
4082     MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
4083     __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
4084     __ call(Address(temp, code_offset.SizeValue()));
4085     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
4086   } else {
4087     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4088     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4089     DCHECK(!codegen_->IsLeafMethod());
4090   }
4091 }
4092 
VisitNewArray(HNewArray * instruction)4093 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4094   LocationSummary* locations =
4095       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
4096   InvokeRuntimeCallingConvention calling_convention;
4097   locations->SetOut(Location::RegisterLocation(RAX));
4098   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4099   locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4100 }
4101 
VisitNewArray(HNewArray * instruction)4102 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4103   // Note: if heap poisoning is enabled, the entry point takes cares
4104   // of poisoning the reference.
4105   QuickEntrypointEnum entrypoint =
4106       CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
4107   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4108   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4109   DCHECK(!codegen_->IsLeafMethod());
4110 }
4111 
VisitParameterValue(HParameterValue * instruction)4112 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4113   LocationSummary* locations =
4114       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4115   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4116   if (location.IsStackSlot()) {
4117     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4118   } else if (location.IsDoubleStackSlot()) {
4119     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4120   }
4121   locations->SetOut(location);
4122 }
4123 
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4124 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4125     HParameterValue* instruction ATTRIBUTE_UNUSED) {
4126   // Nothing to do, the parameter is already at its location.
4127 }
4128 
VisitCurrentMethod(HCurrentMethod * instruction)4129 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4130   LocationSummary* locations =
4131       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4132   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4133 }
4134 
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4135 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4136     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4137   // Nothing to do, the method is already at its location.
4138 }
4139 
VisitClassTableGet(HClassTableGet * instruction)4140 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4141   LocationSummary* locations =
4142       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4143   locations->SetInAt(0, Location::RequiresRegister());
4144   locations->SetOut(Location::RequiresRegister());
4145 }
4146 
VisitClassTableGet(HClassTableGet * instruction)4147 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4148   LocationSummary* locations = instruction->GetLocations();
4149   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4150     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4151         instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4152     __ movq(locations->Out().AsRegister<CpuRegister>(),
4153             Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4154   } else {
4155     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4156         instruction->GetIndex(), kX86_64PointerSize));
4157     __ movq(locations->Out().AsRegister<CpuRegister>(),
4158             Address(locations->InAt(0).AsRegister<CpuRegister>(),
4159             mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4160     __ movq(locations->Out().AsRegister<CpuRegister>(),
4161             Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4162   }
4163 }
4164 
VisitNot(HNot * not_)4165 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4166   LocationSummary* locations =
4167       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
4168   locations->SetInAt(0, Location::RequiresRegister());
4169   locations->SetOut(Location::SameAsFirstInput());
4170 }
4171 
VisitNot(HNot * not_)4172 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4173   LocationSummary* locations = not_->GetLocations();
4174   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4175             locations->Out().AsRegister<CpuRegister>().AsRegister());
4176   Location out = locations->Out();
4177   switch (not_->GetResultType()) {
4178     case Primitive::kPrimInt:
4179       __ notl(out.AsRegister<CpuRegister>());
4180       break;
4181 
4182     case Primitive::kPrimLong:
4183       __ notq(out.AsRegister<CpuRegister>());
4184       break;
4185 
4186     default:
4187       LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4188   }
4189 }
4190 
VisitBooleanNot(HBooleanNot * bool_not)4191 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4192   LocationSummary* locations =
4193       new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4194   locations->SetInAt(0, Location::RequiresRegister());
4195   locations->SetOut(Location::SameAsFirstInput());
4196 }
4197 
VisitBooleanNot(HBooleanNot * bool_not)4198 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4199   LocationSummary* locations = bool_not->GetLocations();
4200   DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4201             locations->Out().AsRegister<CpuRegister>().AsRegister());
4202   Location out = locations->Out();
4203   __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4204 }
4205 
VisitPhi(HPhi * instruction)4206 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4207   LocationSummary* locations =
4208       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4209   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4210     locations->SetInAt(i, Location::Any());
4211   }
4212   locations->SetOut(Location::Any());
4213 }
4214 
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4215 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4216   LOG(FATAL) << "Unimplemented";
4217 }
4218 
GenerateMemoryBarrier(MemBarrierKind kind)4219 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4220   /*
4221    * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4222    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4223    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4224    */
4225   switch (kind) {
4226     case MemBarrierKind::kAnyAny: {
4227       MemoryFence();
4228       break;
4229     }
4230     case MemBarrierKind::kAnyStore:
4231     case MemBarrierKind::kLoadAny:
4232     case MemBarrierKind::kStoreStore: {
4233       // nop
4234       break;
4235     }
4236     case MemBarrierKind::kNTStoreStore:
4237       // Non-Temporal Store/Store needs an explicit fence.
4238       MemoryFence(/* non-temporal */ true);
4239       break;
4240   }
4241 }
4242 
HandleFieldGet(HInstruction * instruction)4243 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4244   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4245 
4246   bool object_field_get_with_read_barrier =
4247       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4248   LocationSummary* locations =
4249       new (GetGraph()->GetArena()) LocationSummary(instruction,
4250                                                    object_field_get_with_read_barrier ?
4251                                                        LocationSummary::kCallOnSlowPath :
4252                                                        LocationSummary::kNoCall);
4253   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4254     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4255   }
4256   locations->SetInAt(0, Location::RequiresRegister());
4257   if (Primitive::IsFloatingPointType(instruction->GetType())) {
4258     locations->SetOut(Location::RequiresFpuRegister());
4259   } else {
4260     // The output overlaps for an object field get when read barriers
4261     // are enabled: we do not want the move to overwrite the object's
4262     // location, as we need it to emit the read barrier.
4263     locations->SetOut(
4264         Location::RequiresRegister(),
4265         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4266   }
4267 }
4268 
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4269 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4270                                                     const FieldInfo& field_info) {
4271   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4272 
4273   LocationSummary* locations = instruction->GetLocations();
4274   Location base_loc = locations->InAt(0);
4275   CpuRegister base = base_loc.AsRegister<CpuRegister>();
4276   Location out = locations->Out();
4277   bool is_volatile = field_info.IsVolatile();
4278   Primitive::Type field_type = field_info.GetFieldType();
4279   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4280 
4281   switch (field_type) {
4282     case Primitive::kPrimBoolean: {
4283       __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4284       break;
4285     }
4286 
4287     case Primitive::kPrimByte: {
4288       __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4289       break;
4290     }
4291 
4292     case Primitive::kPrimShort: {
4293       __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4294       break;
4295     }
4296 
4297     case Primitive::kPrimChar: {
4298       __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4299       break;
4300     }
4301 
4302     case Primitive::kPrimInt: {
4303       __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4304       break;
4305     }
4306 
4307     case Primitive::kPrimNot: {
4308       // /* HeapReference<Object> */ out = *(base + offset)
4309       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4310         // Note that a potential implicit null check is handled in this
4311         // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4312         codegen_->GenerateFieldLoadWithBakerReadBarrier(
4313             instruction, out, base, offset, /* needs_null_check */ true);
4314         if (is_volatile) {
4315           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4316         }
4317       } else {
4318         __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4319         codegen_->MaybeRecordImplicitNullCheck(instruction);
4320         if (is_volatile) {
4321           codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4322         }
4323         // If read barriers are enabled, emit read barriers other than
4324         // Baker's using a slow path (and also unpoison the loaded
4325         // reference, if heap poisoning is enabled).
4326         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4327       }
4328       break;
4329     }
4330 
4331     case Primitive::kPrimLong: {
4332       __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4333       break;
4334     }
4335 
4336     case Primitive::kPrimFloat: {
4337       __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4338       break;
4339     }
4340 
4341     case Primitive::kPrimDouble: {
4342       __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4343       break;
4344     }
4345 
4346     case Primitive::kPrimVoid:
4347       LOG(FATAL) << "Unreachable type " << field_type;
4348       UNREACHABLE();
4349   }
4350 
4351   if (field_type == Primitive::kPrimNot) {
4352     // Potential implicit null checks, in the case of reference
4353     // fields, are handled in the previous switch statement.
4354   } else {
4355     codegen_->MaybeRecordImplicitNullCheck(instruction);
4356   }
4357 
4358   if (is_volatile) {
4359     if (field_type == Primitive::kPrimNot) {
4360       // Memory barriers, in the case of references, are also handled
4361       // in the previous switch statement.
4362     } else {
4363       codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4364     }
4365   }
4366 }
4367 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4368 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4369                                             const FieldInfo& field_info) {
4370   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4371 
4372   LocationSummary* locations =
4373       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4374   Primitive::Type field_type = field_info.GetFieldType();
4375   bool is_volatile = field_info.IsVolatile();
4376   bool needs_write_barrier =
4377       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4378 
4379   locations->SetInAt(0, Location::RequiresRegister());
4380   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4381     if (is_volatile) {
4382       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4383       locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4384     } else {
4385       locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4386     }
4387   } else {
4388     if (is_volatile) {
4389       // In order to satisfy the semantics of volatile, this must be a single instruction store.
4390       locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4391     } else {
4392       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4393     }
4394   }
4395   if (needs_write_barrier) {
4396     // Temporary registers for the write barrier.
4397     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
4398     locations->AddTemp(Location::RequiresRegister());
4399   } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4400     // Temporary register for the reference poisoning.
4401     locations->AddTemp(Location::RequiresRegister());
4402   }
4403 }
4404 
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4405 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4406                                                     const FieldInfo& field_info,
4407                                                     bool value_can_be_null) {
4408   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4409 
4410   LocationSummary* locations = instruction->GetLocations();
4411   CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4412   Location value = locations->InAt(1);
4413   bool is_volatile = field_info.IsVolatile();
4414   Primitive::Type field_type = field_info.GetFieldType();
4415   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4416 
4417   if (is_volatile) {
4418     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4419   }
4420 
4421   bool maybe_record_implicit_null_check_done = false;
4422 
4423   switch (field_type) {
4424     case Primitive::kPrimBoolean:
4425     case Primitive::kPrimByte: {
4426       if (value.IsConstant()) {
4427         int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4428         __ movb(Address(base, offset), Immediate(v));
4429       } else {
4430         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4431       }
4432       break;
4433     }
4434 
4435     case Primitive::kPrimShort:
4436     case Primitive::kPrimChar: {
4437       if (value.IsConstant()) {
4438         int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4439         __ movw(Address(base, offset), Immediate(v));
4440       } else {
4441         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4442       }
4443       break;
4444     }
4445 
4446     case Primitive::kPrimInt:
4447     case Primitive::kPrimNot: {
4448       if (value.IsConstant()) {
4449         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4450         // `field_type == Primitive::kPrimNot` implies `v == 0`.
4451         DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
4452         // Note: if heap poisoning is enabled, no need to poison
4453         // (negate) `v` if it is a reference, as it would be null.
4454         __ movl(Address(base, offset), Immediate(v));
4455       } else {
4456         if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4457           CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4458           __ movl(temp, value.AsRegister<CpuRegister>());
4459           __ PoisonHeapReference(temp);
4460           __ movl(Address(base, offset), temp);
4461         } else {
4462           __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4463         }
4464       }
4465       break;
4466     }
4467 
4468     case Primitive::kPrimLong: {
4469       if (value.IsConstant()) {
4470         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4471         codegen_->MoveInt64ToAddress(Address(base, offset),
4472                                      Address(base, offset + sizeof(int32_t)),
4473                                      v,
4474                                      instruction);
4475         maybe_record_implicit_null_check_done = true;
4476       } else {
4477         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4478       }
4479       break;
4480     }
4481 
4482     case Primitive::kPrimFloat: {
4483       if (value.IsConstant()) {
4484         int32_t v =
4485             bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4486         __ movl(Address(base, offset), Immediate(v));
4487       } else {
4488         __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4489       }
4490       break;
4491     }
4492 
4493     case Primitive::kPrimDouble: {
4494       if (value.IsConstant()) {
4495         int64_t v =
4496             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4497         codegen_->MoveInt64ToAddress(Address(base, offset),
4498                                      Address(base, offset + sizeof(int32_t)),
4499                                      v,
4500                                      instruction);
4501         maybe_record_implicit_null_check_done = true;
4502       } else {
4503         __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4504       }
4505       break;
4506     }
4507 
4508     case Primitive::kPrimVoid:
4509       LOG(FATAL) << "Unreachable type " << field_type;
4510       UNREACHABLE();
4511   }
4512 
4513   if (!maybe_record_implicit_null_check_done) {
4514     codegen_->MaybeRecordImplicitNullCheck(instruction);
4515   }
4516 
4517   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4518     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4519     CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4520     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4521   }
4522 
4523   if (is_volatile) {
4524     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4525   }
4526 }
4527 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4528 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4529   HandleFieldSet(instruction, instruction->GetFieldInfo());
4530 }
4531 
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4532 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4533   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4534 }
4535 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4536 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4537   HandleFieldGet(instruction);
4538 }
4539 
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4540 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4541   HandleFieldGet(instruction, instruction->GetFieldInfo());
4542 }
4543 
VisitStaticFieldGet(HStaticFieldGet * instruction)4544 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4545   HandleFieldGet(instruction);
4546 }
4547 
VisitStaticFieldGet(HStaticFieldGet * instruction)4548 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4549   HandleFieldGet(instruction, instruction->GetFieldInfo());
4550 }
4551 
VisitStaticFieldSet(HStaticFieldSet * instruction)4552 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4553   HandleFieldSet(instruction, instruction->GetFieldInfo());
4554 }
4555 
VisitStaticFieldSet(HStaticFieldSet * instruction)4556 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4557   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4558 }
4559 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4560 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4561     HUnresolvedInstanceFieldGet* instruction) {
4562   FieldAccessCallingConventionX86_64 calling_convention;
4563   codegen_->CreateUnresolvedFieldLocationSummary(
4564       instruction, instruction->GetFieldType(), calling_convention);
4565 }
4566 
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4567 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4568     HUnresolvedInstanceFieldGet* instruction) {
4569   FieldAccessCallingConventionX86_64 calling_convention;
4570   codegen_->GenerateUnresolvedFieldAccess(instruction,
4571                                           instruction->GetFieldType(),
4572                                           instruction->GetFieldIndex(),
4573                                           instruction->GetDexPc(),
4574                                           calling_convention);
4575 }
4576 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4577 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4578     HUnresolvedInstanceFieldSet* instruction) {
4579   FieldAccessCallingConventionX86_64 calling_convention;
4580   codegen_->CreateUnresolvedFieldLocationSummary(
4581       instruction, instruction->GetFieldType(), calling_convention);
4582 }
4583 
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4584 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4585     HUnresolvedInstanceFieldSet* instruction) {
4586   FieldAccessCallingConventionX86_64 calling_convention;
4587   codegen_->GenerateUnresolvedFieldAccess(instruction,
4588                                           instruction->GetFieldType(),
4589                                           instruction->GetFieldIndex(),
4590                                           instruction->GetDexPc(),
4591                                           calling_convention);
4592 }
4593 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4594 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4595     HUnresolvedStaticFieldGet* instruction) {
4596   FieldAccessCallingConventionX86_64 calling_convention;
4597   codegen_->CreateUnresolvedFieldLocationSummary(
4598       instruction, instruction->GetFieldType(), calling_convention);
4599 }
4600 
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4601 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4602     HUnresolvedStaticFieldGet* instruction) {
4603   FieldAccessCallingConventionX86_64 calling_convention;
4604   codegen_->GenerateUnresolvedFieldAccess(instruction,
4605                                           instruction->GetFieldType(),
4606                                           instruction->GetFieldIndex(),
4607                                           instruction->GetDexPc(),
4608                                           calling_convention);
4609 }
4610 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4611 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4612     HUnresolvedStaticFieldSet* instruction) {
4613   FieldAccessCallingConventionX86_64 calling_convention;
4614   codegen_->CreateUnresolvedFieldLocationSummary(
4615       instruction, instruction->GetFieldType(), calling_convention);
4616 }
4617 
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4618 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4619     HUnresolvedStaticFieldSet* instruction) {
4620   FieldAccessCallingConventionX86_64 calling_convention;
4621   codegen_->GenerateUnresolvedFieldAccess(instruction,
4622                                           instruction->GetFieldType(),
4623                                           instruction->GetFieldIndex(),
4624                                           instruction->GetDexPc(),
4625                                           calling_convention);
4626 }
4627 
VisitNullCheck(HNullCheck * instruction)4628 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4629   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4630   Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
4631       ? Location::RequiresRegister()
4632       : Location::Any();
4633   locations->SetInAt(0, loc);
4634 }
4635 
GenerateImplicitNullCheck(HNullCheck * instruction)4636 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4637   if (CanMoveNullCheckToUser(instruction)) {
4638     return;
4639   }
4640   LocationSummary* locations = instruction->GetLocations();
4641   Location obj = locations->InAt(0);
4642 
4643   __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4644   RecordPcInfo(instruction, instruction->GetDexPc());
4645 }
4646 
GenerateExplicitNullCheck(HNullCheck * instruction)4647 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4648   SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
4649   AddSlowPath(slow_path);
4650 
4651   LocationSummary* locations = instruction->GetLocations();
4652   Location obj = locations->InAt(0);
4653 
4654   if (obj.IsRegister()) {
4655     __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4656   } else if (obj.IsStackSlot()) {
4657     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4658   } else {
4659     DCHECK(obj.IsConstant()) << obj;
4660     DCHECK(obj.GetConstant()->IsNullConstant());
4661     __ jmp(slow_path->GetEntryLabel());
4662     return;
4663   }
4664   __ j(kEqual, slow_path->GetEntryLabel());
4665 }
4666 
VisitNullCheck(HNullCheck * instruction)4667 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4668   codegen_->GenerateNullCheck(instruction);
4669 }
4670 
VisitArrayGet(HArrayGet * instruction)4671 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4672   bool object_array_get_with_read_barrier =
4673       kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4674   LocationSummary* locations =
4675       new (GetGraph()->GetArena()) LocationSummary(instruction,
4676                                                    object_array_get_with_read_barrier ?
4677                                                        LocationSummary::kCallOnSlowPath :
4678                                                        LocationSummary::kNoCall);
4679   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4680     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4681   }
4682   locations->SetInAt(0, Location::RequiresRegister());
4683   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4684   if (Primitive::IsFloatingPointType(instruction->GetType())) {
4685     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4686   } else {
4687     // The output overlaps for an object array get when read barriers
4688     // are enabled: we do not want the move to overwrite the array's
4689     // location, as we need it to emit the read barrier.
4690     locations->SetOut(
4691         Location::RequiresRegister(),
4692         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4693   }
4694 }
4695 
VisitArrayGet(HArrayGet * instruction)4696 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4697   LocationSummary* locations = instruction->GetLocations();
4698   Location obj_loc = locations->InAt(0);
4699   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4700   Location index = locations->InAt(1);
4701   Location out_loc = locations->Out();
4702   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
4703 
4704   Primitive::Type type = instruction->GetType();
4705   switch (type) {
4706     case Primitive::kPrimBoolean: {
4707       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4708       __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4709       break;
4710     }
4711 
4712     case Primitive::kPrimByte: {
4713       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4714       __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4715       break;
4716     }
4717 
4718     case Primitive::kPrimShort: {
4719       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4720       __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4721       break;
4722     }
4723 
4724     case Primitive::kPrimChar: {
4725       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4726       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
4727         // Branch cases into compressed and uncompressed for each index's type.
4728         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
4729         NearLabel done, not_compressed;
4730         __ testb(Address(obj, count_offset), Immediate(1));
4731         codegen_->MaybeRecordImplicitNullCheck(instruction);
4732         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
4733                       "Expecting 0=compressed, 1=uncompressed");
4734         __ j(kNotZero, &not_compressed);
4735         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4736         __ jmp(&done);
4737         __ Bind(&not_compressed);
4738         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4739         __ Bind(&done);
4740       } else {
4741         __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4742       }
4743       break;
4744     }
4745 
4746     case Primitive::kPrimInt: {
4747       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4748       __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4749       break;
4750     }
4751 
4752     case Primitive::kPrimNot: {
4753       static_assert(
4754           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4755           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4756       // /* HeapReference<Object> */ out =
4757       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
4758       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4759         // Note that a potential implicit null check is handled in this
4760         // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
4761         codegen_->GenerateArrayLoadWithBakerReadBarrier(
4762             instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
4763       } else {
4764         CpuRegister out = out_loc.AsRegister<CpuRegister>();
4765         __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4766         codegen_->MaybeRecordImplicitNullCheck(instruction);
4767         // If read barriers are enabled, emit read barriers other than
4768         // Baker's using a slow path (and also unpoison the loaded
4769         // reference, if heap poisoning is enabled).
4770         if (index.IsConstant()) {
4771           uint32_t offset =
4772               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4773           codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4774         } else {
4775           codegen_->MaybeGenerateReadBarrierSlow(
4776               instruction, out_loc, out_loc, obj_loc, data_offset, index);
4777         }
4778       }
4779       break;
4780     }
4781 
4782     case Primitive::kPrimLong: {
4783       CpuRegister out = out_loc.AsRegister<CpuRegister>();
4784       __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
4785       break;
4786     }
4787 
4788     case Primitive::kPrimFloat: {
4789       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4790       __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4791       break;
4792     }
4793 
4794     case Primitive::kPrimDouble: {
4795       XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4796       __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
4797       break;
4798     }
4799 
4800     case Primitive::kPrimVoid:
4801       LOG(FATAL) << "Unreachable type " << type;
4802       UNREACHABLE();
4803   }
4804 
4805   if (type == Primitive::kPrimNot) {
4806     // Potential implicit null checks, in the case of reference
4807     // arrays, are handled in the previous switch statement.
4808   } else {
4809     codegen_->MaybeRecordImplicitNullCheck(instruction);
4810   }
4811 }
4812 
VisitArraySet(HArraySet * instruction)4813 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4814   Primitive::Type value_type = instruction->GetComponentType();
4815 
4816   bool needs_write_barrier =
4817       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4818   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4819 
4820   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
4821       instruction,
4822       may_need_runtime_call_for_type_check ?
4823           LocationSummary::kCallOnSlowPath :
4824           LocationSummary::kNoCall);
4825 
4826   locations->SetInAt(0, Location::RequiresRegister());
4827   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4828   if (Primitive::IsFloatingPointType(value_type)) {
4829     locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4830   } else {
4831     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4832   }
4833 
4834   if (needs_write_barrier) {
4835     // Temporary registers for the write barrier.
4836     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
4837     locations->AddTemp(Location::RequiresRegister());
4838   }
4839 }
4840 
VisitArraySet(HArraySet * instruction)4841 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4842   LocationSummary* locations = instruction->GetLocations();
4843   Location array_loc = locations->InAt(0);
4844   CpuRegister array = array_loc.AsRegister<CpuRegister>();
4845   Location index = locations->InAt(1);
4846   Location value = locations->InAt(2);
4847   Primitive::Type value_type = instruction->GetComponentType();
4848   bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4849   bool needs_write_barrier =
4850       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4851   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4852   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4853   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4854 
4855   switch (value_type) {
4856     case Primitive::kPrimBoolean:
4857     case Primitive::kPrimByte: {
4858       uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4859       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
4860       if (value.IsRegister()) {
4861         __ movb(address, value.AsRegister<CpuRegister>());
4862       } else {
4863         __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4864       }
4865       codegen_->MaybeRecordImplicitNullCheck(instruction);
4866       break;
4867     }
4868 
4869     case Primitive::kPrimShort:
4870     case Primitive::kPrimChar: {
4871       uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4872       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
4873       if (value.IsRegister()) {
4874         __ movw(address, value.AsRegister<CpuRegister>());
4875       } else {
4876         DCHECK(value.IsConstant()) << value;
4877         __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4878       }
4879       codegen_->MaybeRecordImplicitNullCheck(instruction);
4880       break;
4881     }
4882 
4883     case Primitive::kPrimNot: {
4884       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4885       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4886 
4887       if (!value.IsRegister()) {
4888         // Just setting null.
4889         DCHECK(instruction->InputAt(2)->IsNullConstant());
4890         DCHECK(value.IsConstant()) << value;
4891         __ movl(address, Immediate(0));
4892         codegen_->MaybeRecordImplicitNullCheck(instruction);
4893         DCHECK(!needs_write_barrier);
4894         DCHECK(!may_need_runtime_call_for_type_check);
4895         break;
4896       }
4897 
4898       DCHECK(needs_write_barrier);
4899       CpuRegister register_value = value.AsRegister<CpuRegister>();
4900       // We cannot use a NearLabel for `done`, as its range may be too
4901       // short when Baker read barriers are enabled.
4902       Label done;
4903       NearLabel not_null, do_put;
4904       SlowPathCode* slow_path = nullptr;
4905       Location temp_loc = locations->GetTemp(0);
4906       CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4907       if (may_need_runtime_call_for_type_check) {
4908         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
4909         codegen_->AddSlowPath(slow_path);
4910         if (instruction->GetValueCanBeNull()) {
4911           __ testl(register_value, register_value);
4912           __ j(kNotEqual, &not_null);
4913           __ movl(address, Immediate(0));
4914           codegen_->MaybeRecordImplicitNullCheck(instruction);
4915           __ jmp(&done);
4916           __ Bind(&not_null);
4917         }
4918 
4919         // Note that when Baker read barriers are enabled, the type
4920         // checks are performed without read barriers.  This is fine,
4921         // even in the case where a class object is in the from-space
4922         // after the flip, as a comparison involving such a type would
4923         // not produce a false positive; it may of course produce a
4924         // false negative, in which case we would take the ArraySet
4925         // slow path.
4926 
4927         // /* HeapReference<Class> */ temp = array->klass_
4928         __ movl(temp, Address(array, class_offset));
4929         codegen_->MaybeRecordImplicitNullCheck(instruction);
4930         __ MaybeUnpoisonHeapReference(temp);
4931 
4932         // /* HeapReference<Class> */ temp = temp->component_type_
4933         __ movl(temp, Address(temp, component_offset));
4934         // If heap poisoning is enabled, no need to unpoison `temp`
4935         // nor the object reference in `register_value->klass`, as
4936         // we are comparing two poisoned references.
4937         __ cmpl(temp, Address(register_value, class_offset));
4938 
4939         if (instruction->StaticTypeOfArrayIsObjectArray()) {
4940           __ j(kEqual, &do_put);
4941           // If heap poisoning is enabled, the `temp` reference has
4942           // not been unpoisoned yet; unpoison it now.
4943           __ MaybeUnpoisonHeapReference(temp);
4944 
4945           // If heap poisoning is enabled, no need to unpoison the
4946           // heap reference loaded below, as it is only used for a
4947           // comparison with null.
4948           __ cmpl(Address(temp, super_offset), Immediate(0));
4949           __ j(kNotEqual, slow_path->GetEntryLabel());
4950           __ Bind(&do_put);
4951         } else {
4952           __ j(kNotEqual, slow_path->GetEntryLabel());
4953         }
4954       }
4955 
4956       if (kPoisonHeapReferences) {
4957         __ movl(temp, register_value);
4958         __ PoisonHeapReference(temp);
4959         __ movl(address, temp);
4960       } else {
4961         __ movl(address, register_value);
4962       }
4963       if (!may_need_runtime_call_for_type_check) {
4964         codegen_->MaybeRecordImplicitNullCheck(instruction);
4965       }
4966 
4967       CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4968       codegen_->MarkGCCard(
4969           temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4970       __ Bind(&done);
4971 
4972       if (slow_path != nullptr) {
4973         __ Bind(slow_path->GetExitLabel());
4974       }
4975 
4976       break;
4977     }
4978 
4979     case Primitive::kPrimInt: {
4980       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4981       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4982       if (value.IsRegister()) {
4983         __ movl(address, value.AsRegister<CpuRegister>());
4984       } else {
4985         DCHECK(value.IsConstant()) << value;
4986         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4987         __ movl(address, Immediate(v));
4988       }
4989       codegen_->MaybeRecordImplicitNullCheck(instruction);
4990       break;
4991     }
4992 
4993     case Primitive::kPrimLong: {
4994       uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4995       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
4996       if (value.IsRegister()) {
4997         __ movq(address, value.AsRegister<CpuRegister>());
4998         codegen_->MaybeRecordImplicitNullCheck(instruction);
4999       } else {
5000         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5001         Address address_high =
5002             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5003         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5004       }
5005       break;
5006     }
5007 
5008     case Primitive::kPrimFloat: {
5009       uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5010       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5011       if (value.IsFpuRegister()) {
5012         __ movss(address, value.AsFpuRegister<XmmRegister>());
5013       } else {
5014         DCHECK(value.IsConstant());
5015         int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5016         __ movl(address, Immediate(v));
5017       }
5018       codegen_->MaybeRecordImplicitNullCheck(instruction);
5019       break;
5020     }
5021 
5022     case Primitive::kPrimDouble: {
5023       uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5024       Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5025       if (value.IsFpuRegister()) {
5026         __ movsd(address, value.AsFpuRegister<XmmRegister>());
5027         codegen_->MaybeRecordImplicitNullCheck(instruction);
5028       } else {
5029         int64_t v =
5030             bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5031         Address address_high =
5032             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5033         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5034       }
5035       break;
5036     }
5037 
5038     case Primitive::kPrimVoid:
5039       LOG(FATAL) << "Unreachable type " << instruction->GetType();
5040       UNREACHABLE();
5041   }
5042 }
5043 
VisitArrayLength(HArrayLength * instruction)5044 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5045   LocationSummary* locations =
5046       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
5047   locations->SetInAt(0, Location::RequiresRegister());
5048   if (!instruction->IsEmittedAtUseSite()) {
5049     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5050   }
5051 }
5052 
VisitArrayLength(HArrayLength * instruction)5053 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5054   if (instruction->IsEmittedAtUseSite()) {
5055     return;
5056   }
5057 
5058   LocationSummary* locations = instruction->GetLocations();
5059   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5060   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5061   CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5062   __ movl(out, Address(obj, offset));
5063   codegen_->MaybeRecordImplicitNullCheck(instruction);
5064   // Mask out most significant bit in case the array is String's array of char.
5065   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5066     __ shrl(out, Immediate(1));
5067   }
5068 }
5069 
VisitBoundsCheck(HBoundsCheck * instruction)5070 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5071   RegisterSet caller_saves = RegisterSet::Empty();
5072   InvokeRuntimeCallingConvention calling_convention;
5073   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5074   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5075   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5076   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5077   HInstruction* length = instruction->InputAt(1);
5078   if (!length->IsEmittedAtUseSite()) {
5079     locations->SetInAt(1, Location::RegisterOrConstant(length));
5080   }
5081 }
5082 
VisitBoundsCheck(HBoundsCheck * instruction)5083 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5084   LocationSummary* locations = instruction->GetLocations();
5085   Location index_loc = locations->InAt(0);
5086   Location length_loc = locations->InAt(1);
5087   SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
5088 
5089   if (length_loc.IsConstant()) {
5090     int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5091     if (index_loc.IsConstant()) {
5092       // BCE will remove the bounds check if we are guarenteed to pass.
5093       int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5094       if (index < 0 || index >= length) {
5095         codegen_->AddSlowPath(slow_path);
5096         __ jmp(slow_path->GetEntryLabel());
5097       } else {
5098         // Some optimization after BCE may have generated this, and we should not
5099         // generate a bounds check if it is a valid range.
5100       }
5101       return;
5102     }
5103 
5104     // We have to reverse the jump condition because the length is the constant.
5105     CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5106     __ cmpl(index_reg, Immediate(length));
5107     codegen_->AddSlowPath(slow_path);
5108     __ j(kAboveEqual, slow_path->GetEntryLabel());
5109   } else {
5110     HInstruction* array_length = instruction->InputAt(1);
5111     if (array_length->IsEmittedAtUseSite()) {
5112       // Address the length field in the array.
5113       DCHECK(array_length->IsArrayLength());
5114       uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5115       Location array_loc = array_length->GetLocations()->InAt(0);
5116       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5117       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5118         // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5119         // the string compression flag) with the in-memory length and avoid the temporary.
5120         CpuRegister length_reg = CpuRegister(TMP);
5121         __ movl(length_reg, array_len);
5122         codegen_->MaybeRecordImplicitNullCheck(array_length);
5123         __ shrl(length_reg, Immediate(1));
5124         codegen_->GenerateIntCompare(length_reg, index_loc);
5125       } else {
5126         // Checking the bound for general case:
5127         // Array of char or String's array when the compression feature off.
5128         if (index_loc.IsConstant()) {
5129           int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5130           __ cmpl(array_len, Immediate(value));
5131         } else {
5132           __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5133         }
5134         codegen_->MaybeRecordImplicitNullCheck(array_length);
5135       }
5136     } else {
5137       codegen_->GenerateIntCompare(length_loc, index_loc);
5138     }
5139     codegen_->AddSlowPath(slow_path);
5140     __ j(kBelowEqual, slow_path->GetEntryLabel());
5141   }
5142 }
5143 
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5144 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5145                                      CpuRegister card,
5146                                      CpuRegister object,
5147                                      CpuRegister value,
5148                                      bool value_can_be_null) {
5149   NearLabel is_null;
5150   if (value_can_be_null) {
5151     __ testl(value, value);
5152     __ j(kEqual, &is_null);
5153   }
5154   __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5155                                         /* no_rip */ true));
5156   __ movq(temp, object);
5157   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5158   __ movb(Address(temp, card, TIMES_1, 0), card);
5159   if (value_can_be_null) {
5160     __ Bind(&is_null);
5161   }
5162 }
5163 
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5164 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5165   LOG(FATAL) << "Unimplemented";
5166 }
5167 
VisitParallelMove(HParallelMove * instruction)5168 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5169   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5170 }
5171 
VisitSuspendCheck(HSuspendCheck * instruction)5172 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5173   LocationSummary* locations =
5174       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5175   // In suspend check slow path, usually there are no caller-save registers at all.
5176   // If SIMD instructions are present, however, we force spilling all live SIMD
5177   // registers in full width (since the runtime only saves/restores lower part).
5178   locations->SetCustomSlowPathCallerSaves(
5179       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5180 }
5181 
VisitSuspendCheck(HSuspendCheck * instruction)5182 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5183   HBasicBlock* block = instruction->GetBlock();
5184   if (block->GetLoopInformation() != nullptr) {
5185     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5186     // The back edge will generate the suspend check.
5187     return;
5188   }
5189   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5190     // The goto will generate the suspend check.
5191     return;
5192   }
5193   GenerateSuspendCheck(instruction, nullptr);
5194 }
5195 
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5196 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5197                                                           HBasicBlock* successor) {
5198   SuspendCheckSlowPathX86_64* slow_path =
5199       down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5200   if (slow_path == nullptr) {
5201     slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
5202     instruction->SetSlowPath(slow_path);
5203     codegen_->AddSlowPath(slow_path);
5204     if (successor != nullptr) {
5205       DCHECK(successor->IsLoopHeader());
5206       codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5207     }
5208   } else {
5209     DCHECK_EQ(slow_path->GetSuccessor(), successor);
5210   }
5211 
5212   __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5213                                   /* no_rip */ true),
5214                 Immediate(0));
5215   if (successor == nullptr) {
5216     __ j(kNotEqual, slow_path->GetEntryLabel());
5217     __ Bind(slow_path->GetReturnLabel());
5218   } else {
5219     __ j(kEqual, codegen_->GetLabelOf(successor));
5220     __ jmp(slow_path->GetEntryLabel());
5221   }
5222 }
5223 
GetAssembler() const5224 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5225   return codegen_->GetAssembler();
5226 }
5227 
EmitMove(size_t index)5228 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5229   MoveOperands* move = moves_[index];
5230   Location source = move->GetSource();
5231   Location destination = move->GetDestination();
5232 
5233   if (source.IsRegister()) {
5234     if (destination.IsRegister()) {
5235       __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5236     } else if (destination.IsStackSlot()) {
5237       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5238               source.AsRegister<CpuRegister>());
5239     } else {
5240       DCHECK(destination.IsDoubleStackSlot());
5241       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5242               source.AsRegister<CpuRegister>());
5243     }
5244   } else if (source.IsStackSlot()) {
5245     if (destination.IsRegister()) {
5246       __ movl(destination.AsRegister<CpuRegister>(),
5247               Address(CpuRegister(RSP), source.GetStackIndex()));
5248     } else if (destination.IsFpuRegister()) {
5249       __ movss(destination.AsFpuRegister<XmmRegister>(),
5250               Address(CpuRegister(RSP), source.GetStackIndex()));
5251     } else {
5252       DCHECK(destination.IsStackSlot());
5253       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5254       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5255     }
5256   } else if (source.IsDoubleStackSlot()) {
5257     if (destination.IsRegister()) {
5258       __ movq(destination.AsRegister<CpuRegister>(),
5259               Address(CpuRegister(RSP), source.GetStackIndex()));
5260     } else if (destination.IsFpuRegister()) {
5261       __ movsd(destination.AsFpuRegister<XmmRegister>(),
5262                Address(CpuRegister(RSP), source.GetStackIndex()));
5263     } else {
5264       DCHECK(destination.IsDoubleStackSlot()) << destination;
5265       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5266       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5267     }
5268   } else if (source.IsSIMDStackSlot()) {
5269     DCHECK(destination.IsFpuRegister());
5270     __ movups(destination.AsFpuRegister<XmmRegister>(),
5271               Address(CpuRegister(RSP), source.GetStackIndex()));
5272   } else if (source.IsConstant()) {
5273     HConstant* constant = source.GetConstant();
5274     if (constant->IsIntConstant() || constant->IsNullConstant()) {
5275       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5276       if (destination.IsRegister()) {
5277         if (value == 0) {
5278           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5279         } else {
5280           __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5281         }
5282       } else {
5283         DCHECK(destination.IsStackSlot()) << destination;
5284         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5285       }
5286     } else if (constant->IsLongConstant()) {
5287       int64_t value = constant->AsLongConstant()->GetValue();
5288       if (destination.IsRegister()) {
5289         codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5290       } else {
5291         DCHECK(destination.IsDoubleStackSlot()) << destination;
5292         codegen_->Store64BitValueToStack(destination, value);
5293       }
5294     } else if (constant->IsFloatConstant()) {
5295       float fp_value = constant->AsFloatConstant()->GetValue();
5296       if (destination.IsFpuRegister()) {
5297         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5298         codegen_->Load32BitValue(dest, fp_value);
5299       } else {
5300         DCHECK(destination.IsStackSlot()) << destination;
5301         Immediate imm(bit_cast<int32_t, float>(fp_value));
5302         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5303       }
5304     } else {
5305       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5306       double fp_value =  constant->AsDoubleConstant()->GetValue();
5307       int64_t value = bit_cast<int64_t, double>(fp_value);
5308       if (destination.IsFpuRegister()) {
5309         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5310         codegen_->Load64BitValue(dest, fp_value);
5311       } else {
5312         DCHECK(destination.IsDoubleStackSlot()) << destination;
5313         codegen_->Store64BitValueToStack(destination, value);
5314       }
5315     }
5316   } else if (source.IsFpuRegister()) {
5317     if (destination.IsFpuRegister()) {
5318       __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5319     } else if (destination.IsStackSlot()) {
5320       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5321                source.AsFpuRegister<XmmRegister>());
5322     } else if (destination.IsDoubleStackSlot()) {
5323       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5324                source.AsFpuRegister<XmmRegister>());
5325     } else {
5326        DCHECK(destination.IsSIMDStackSlot());
5327       __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
5328                 source.AsFpuRegister<XmmRegister>());
5329     }
5330   }
5331 }
5332 
Exchange32(CpuRegister reg,int mem)5333 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5334   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5335   __ movl(Address(CpuRegister(RSP), mem), reg);
5336   __ movl(reg, CpuRegister(TMP));
5337 }
5338 
Exchange32(int mem1,int mem2)5339 void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
5340   ScratchRegisterScope ensure_scratch(
5341       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5342 
5343   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5344   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5345   __ movl(CpuRegister(ensure_scratch.GetRegister()),
5346           Address(CpuRegister(RSP), mem2 + stack_offset));
5347   __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5348   __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5349           CpuRegister(ensure_scratch.GetRegister()));
5350 }
5351 
Exchange64(CpuRegister reg1,CpuRegister reg2)5352 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5353   __ movq(CpuRegister(TMP), reg1);
5354   __ movq(reg1, reg2);
5355   __ movq(reg2, CpuRegister(TMP));
5356 }
5357 
Exchange64(CpuRegister reg,int mem)5358 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5359   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5360   __ movq(Address(CpuRegister(RSP), mem), reg);
5361   __ movq(reg, CpuRegister(TMP));
5362 }
5363 
Exchange64(int mem1,int mem2)5364 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
5365   ScratchRegisterScope ensure_scratch(
5366       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5367 
5368   int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5369   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5370   __ movq(CpuRegister(ensure_scratch.GetRegister()),
5371           Address(CpuRegister(RSP), mem2 + stack_offset));
5372   __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5373   __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5374           CpuRegister(ensure_scratch.GetRegister()));
5375 }
5376 
Exchange32(XmmRegister reg,int mem)5377 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5378   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5379   __ movss(Address(CpuRegister(RSP), mem), reg);
5380   __ movd(reg, CpuRegister(TMP));
5381 }
5382 
Exchange64(XmmRegister reg,int mem)5383 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5384   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5385   __ movsd(Address(CpuRegister(RSP), mem), reg);
5386   __ movd(reg, CpuRegister(TMP));
5387 }
5388 
EmitSwap(size_t index)5389 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5390   MoveOperands* move = moves_[index];
5391   Location source = move->GetSource();
5392   Location destination = move->GetDestination();
5393 
5394   if (source.IsRegister() && destination.IsRegister()) {
5395     Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5396   } else if (source.IsRegister() && destination.IsStackSlot()) {
5397     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5398   } else if (source.IsStackSlot() && destination.IsRegister()) {
5399     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5400   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5401     Exchange32(destination.GetStackIndex(), source.GetStackIndex());
5402   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5403     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5404   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5405     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5406   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5407     Exchange64(destination.GetStackIndex(), source.GetStackIndex());
5408   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5409     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5410     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5411     __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5412   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5413     Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5414   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5415     Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5416   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5417     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5418   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5419     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5420   } else {
5421     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5422   }
5423 }
5424 
5425 
SpillScratch(int reg)5426 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5427   __ pushq(CpuRegister(reg));
5428 }
5429 
5430 
RestoreScratch(int reg)5431 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5432   __ popq(CpuRegister(reg));
5433 }
5434 
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5435 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5436     SlowPathCode* slow_path, CpuRegister class_reg) {
5437   __ cmpl(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
5438           Immediate(mirror::Class::kStatusInitialized));
5439   __ j(kLess, slow_path->GetEntryLabel());
5440   __ Bind(slow_path->GetExitLabel());
5441   // No need for memory fence, thanks to the x86-64 memory model.
5442 }
5443 
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5444 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
5445     HLoadClass::LoadKind desired_class_load_kind) {
5446   switch (desired_class_load_kind) {
5447     case HLoadClass::LoadKind::kInvalid:
5448       LOG(FATAL) << "UNREACHABLE";
5449       UNREACHABLE();
5450     case HLoadClass::LoadKind::kReferrersClass:
5451       break;
5452     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
5453       DCHECK(!GetCompilerOptions().GetCompilePic());
5454       // We prefer the always-available RIP-relative address for the x86-64 boot image.
5455       return HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
5456     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5457       DCHECK(GetCompilerOptions().GetCompilePic());
5458       break;
5459     case HLoadClass::LoadKind::kBootImageAddress:
5460       break;
5461     case HLoadClass::LoadKind::kBssEntry:
5462       DCHECK(!Runtime::Current()->UseJitCompilation());
5463       break;
5464     case HLoadClass::LoadKind::kJitTableAddress:
5465       DCHECK(Runtime::Current()->UseJitCompilation());
5466       break;
5467     case HLoadClass::LoadKind::kDexCacheViaMethod:
5468       break;
5469   }
5470   return desired_class_load_kind;
5471 }
5472 
VisitLoadClass(HLoadClass * cls)5473 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5474   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5475   if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
5476     // Custom calling convention: RAX serves as both input and output.
5477     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5478         cls,
5479         Location::RegisterLocation(RAX),
5480         Location::RegisterLocation(RAX));
5481     return;
5482   }
5483   DCHECK(!cls->NeedsAccessCheck());
5484 
5485   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5486   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5487       ? LocationSummary::kCallOnSlowPath
5488       : LocationSummary::kNoCall;
5489   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
5490   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5491     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5492   }
5493 
5494   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5495     locations->SetInAt(0, Location::RequiresRegister());
5496   }
5497   locations->SetOut(Location::RequiresRegister());
5498   if (load_kind == HLoadClass::LoadKind::kBssEntry) {
5499     if (!kUseReadBarrier || kUseBakerReadBarrier) {
5500       // Rely on the type resolution and/or initialization to save everything.
5501       // Custom calling convention: RAX serves as both input and output.
5502       RegisterSet caller_saves = RegisterSet::Empty();
5503       caller_saves.Add(Location::RegisterLocation(RAX));
5504       locations->SetCustomSlowPathCallerSaves(caller_saves);
5505     } else {
5506       // For non-Baker read barrier we have a temp-clobbering call.
5507     }
5508   }
5509 }
5510 
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex dex_index,Handle<mirror::Class> handle)5511 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
5512                                                  dex::TypeIndex dex_index,
5513                                                  Handle<mirror::Class> handle) {
5514   jit_class_roots_.Overwrite(
5515       TypeReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference()));
5516   // Add a patch entry and return the label.
5517   jit_class_patches_.emplace_back(dex_file, dex_index.index_);
5518   PatchInfo<Label>* info = &jit_class_patches_.back();
5519   return &info->label;
5520 }
5521 
5522 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5523 // move.
VisitLoadClass(HLoadClass * cls)5524 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5525   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5526   if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
5527     codegen_->GenerateLoadClassRuntimeCall(cls);
5528     return;
5529   }
5530   DCHECK(!cls->NeedsAccessCheck());
5531 
5532   LocationSummary* locations = cls->GetLocations();
5533   Location out_loc = locations->Out();
5534   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5535 
5536   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
5537       ? kWithoutReadBarrier
5538       : kCompilerReadBarrierOption;
5539   bool generate_null_check = false;
5540   switch (load_kind) {
5541     case HLoadClass::LoadKind::kReferrersClass: {
5542       DCHECK(!cls->CanCallRuntime());
5543       DCHECK(!cls->MustGenerateClinitCheck());
5544       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5545       CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5546       GenerateGcRootFieldLoad(
5547           cls,
5548           out_loc,
5549           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
5550           /* fixup_label */ nullptr,
5551           read_barrier_option);
5552       break;
5553     }
5554     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5555       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5556       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5557       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5558       codegen_->RecordBootTypePatch(cls);
5559       break;
5560     case HLoadClass::LoadKind::kBootImageAddress: {
5561       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5562       uint32_t address = dchecked_integral_cast<uint32_t>(
5563           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
5564       DCHECK_NE(address, 0u);
5565       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
5566       break;
5567     }
5568     case HLoadClass::LoadKind::kBssEntry: {
5569       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5570                                           /* no_rip */ false);
5571       Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
5572       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
5573       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5574       generate_null_check = true;
5575       break;
5576     }
5577     case HLoadClass::LoadKind::kJitTableAddress: {
5578       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5579                                           /* no_rip */ true);
5580       Label* fixup_label =
5581           codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
5582       // /* GcRoot<mirror::Class> */ out = *address
5583       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5584       break;
5585     }
5586     default:
5587       LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
5588       UNREACHABLE();
5589   }
5590 
5591   if (generate_null_check || cls->MustGenerateClinitCheck()) {
5592     DCHECK(cls->CanCallRuntime());
5593     SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5594         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5595     codegen_->AddSlowPath(slow_path);
5596     if (generate_null_check) {
5597       __ testl(out, out);
5598       __ j(kEqual, slow_path->GetEntryLabel());
5599     }
5600     if (cls->MustGenerateClinitCheck()) {
5601       GenerateClassInitializationCheck(slow_path, out);
5602     } else {
5603       __ Bind(slow_path->GetExitLabel());
5604     }
5605   }
5606 }
5607 
VisitClinitCheck(HClinitCheck * check)5608 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5609   LocationSummary* locations =
5610       new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5611   locations->SetInAt(0, Location::RequiresRegister());
5612   if (check->HasUses()) {
5613     locations->SetOut(Location::SameAsFirstInput());
5614   }
5615 }
5616 
VisitClinitCheck(HClinitCheck * check)5617 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5618   // We assume the class to not be null.
5619   SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5620       check->GetLoadClass(), check, check->GetDexPc(), true);
5621   codegen_->AddSlowPath(slow_path);
5622   GenerateClassInitializationCheck(slow_path,
5623                                    check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5624 }
5625 
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5626 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5627     HLoadString::LoadKind desired_string_load_kind) {
5628   switch (desired_string_load_kind) {
5629     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5630       DCHECK(!GetCompilerOptions().GetCompilePic());
5631       // We prefer the always-available RIP-relative address for the x86-64 boot image.
5632       return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
5633     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5634       DCHECK(GetCompilerOptions().GetCompilePic());
5635       break;
5636     case HLoadString::LoadKind::kBootImageAddress:
5637       break;
5638     case HLoadString::LoadKind::kBssEntry:
5639       DCHECK(!Runtime::Current()->UseJitCompilation());
5640       break;
5641     case HLoadString::LoadKind::kJitTableAddress:
5642       DCHECK(Runtime::Current()->UseJitCompilation());
5643       break;
5644     case HLoadString::LoadKind::kDexCacheViaMethod:
5645       break;
5646   }
5647   return desired_string_load_kind;
5648 }
5649 
VisitLoadString(HLoadString * load)5650 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5651   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5652   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5653   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5654     locations->SetOut(Location::RegisterLocation(RAX));
5655   } else {
5656     locations->SetOut(Location::RequiresRegister());
5657     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5658       if (!kUseReadBarrier || kUseBakerReadBarrier) {
5659         // Rely on the pResolveString to save everything.
5660         // Custom calling convention: RAX serves as both input and output.
5661         RegisterSet caller_saves = RegisterSet::Empty();
5662         caller_saves.Add(Location::RegisterLocation(RAX));
5663         locations->SetCustomSlowPathCallerSaves(caller_saves);
5664       } else {
5665         // For non-Baker read barrier we have a temp-clobbering call.
5666       }
5667     }
5668   }
5669 }
5670 
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex dex_index,Handle<mirror::String> handle)5671 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
5672                                                   dex::StringIndex dex_index,
5673                                                   Handle<mirror::String> handle) {
5674   jit_string_roots_.Overwrite(
5675       StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference()));
5676   // Add a patch entry and return the label.
5677   jit_string_patches_.emplace_back(dex_file, dex_index.index_);
5678   PatchInfo<Label>* info = &jit_string_patches_.back();
5679   return &info->label;
5680 }
5681 
5682 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5683 // move.
VisitLoadString(HLoadString * load)5684 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5685   LocationSummary* locations = load->GetLocations();
5686   Location out_loc = locations->Out();
5687   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5688 
5689   switch (load->GetLoadKind()) {
5690     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5691       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5692       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5693       codegen_->RecordBootStringPatch(load);
5694       return;  // No dex cache slow path.
5695     }
5696     case HLoadString::LoadKind::kBootImageAddress: {
5697       uint32_t address = dchecked_integral_cast<uint32_t>(
5698           reinterpret_cast<uintptr_t>(load->GetString().Get()));
5699       DCHECK_NE(address, 0u);
5700       __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
5701       return;  // No dex cache slow path.
5702     }
5703     case HLoadString::LoadKind::kBssEntry: {
5704       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5705                                           /* no_rip */ false);
5706       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
5707       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
5708       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
5709       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
5710       codegen_->AddSlowPath(slow_path);
5711       __ testl(out, out);
5712       __ j(kEqual, slow_path->GetEntryLabel());
5713       __ Bind(slow_path->GetExitLabel());
5714       return;
5715     }
5716     case HLoadString::LoadKind::kJitTableAddress: {
5717       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5718                                           /* no_rip */ true);
5719       Label* fixup_label = codegen_->NewJitRootStringPatch(
5720           load->GetDexFile(), load->GetStringIndex(), load->GetString());
5721       // /* GcRoot<mirror::String> */ out = *address
5722       GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
5723       return;
5724     }
5725     default:
5726       break;
5727   }
5728 
5729   // TODO: Re-add the compiler code to do string dex cache lookup again.
5730   // Custom calling convention: RAX serves as both input and output.
5731   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
5732   codegen_->InvokeRuntime(kQuickResolveString,
5733                           load,
5734                           load->GetDexPc());
5735   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5736 }
5737 
GetExceptionTlsAddress()5738 static Address GetExceptionTlsAddress() {
5739   return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
5740                            /* no_rip */ true);
5741 }
5742 
VisitLoadException(HLoadException * load)5743 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5744   LocationSummary* locations =
5745       new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5746   locations->SetOut(Location::RequiresRegister());
5747 }
5748 
VisitLoadException(HLoadException * load)5749 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5750   __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5751 }
5752 
VisitClearException(HClearException * clear)5753 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5754   new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5755 }
5756 
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5757 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5758   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5759 }
5760 
VisitThrow(HThrow * instruction)5761 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5762   LocationSummary* locations =
5763       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5764   InvokeRuntimeCallingConvention calling_convention;
5765   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5766 }
5767 
VisitThrow(HThrow * instruction)5768 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5769   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5770   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5771 }
5772 
CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5773 static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5774   if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
5775     // We need a temporary for holding the iftable length.
5776     return true;
5777   }
5778   return kEmitCompilerReadBarrier &&
5779       !kUseBakerReadBarrier &&
5780       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5781        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5782        type_check_kind == TypeCheckKind::kArrayObjectCheck);
5783 }
5784 
InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5785 static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5786   return kEmitCompilerReadBarrier &&
5787       !kUseBakerReadBarrier &&
5788       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5789        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5790        type_check_kind == TypeCheckKind::kArrayObjectCheck);
5791 }
5792 
VisitInstanceOf(HInstanceOf * instruction)5793 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5794   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5795   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5796   bool baker_read_barrier_slow_path = false;
5797   switch (type_check_kind) {
5798     case TypeCheckKind::kExactCheck:
5799     case TypeCheckKind::kAbstractClassCheck:
5800     case TypeCheckKind::kClassHierarchyCheck:
5801     case TypeCheckKind::kArrayObjectCheck:
5802       call_kind =
5803           kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5804       baker_read_barrier_slow_path = kUseBakerReadBarrier;
5805       break;
5806     case TypeCheckKind::kArrayCheck:
5807     case TypeCheckKind::kUnresolvedCheck:
5808     case TypeCheckKind::kInterfaceCheck:
5809       call_kind = LocationSummary::kCallOnSlowPath;
5810       break;
5811   }
5812 
5813   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5814   if (baker_read_barrier_slow_path) {
5815     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
5816   }
5817   locations->SetInAt(0, Location::RequiresRegister());
5818   locations->SetInAt(1, Location::Any());
5819   // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5820   locations->SetOut(Location::RequiresRegister());
5821   // When read barriers are enabled, we need a temporary register for
5822   // some cases.
5823   if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
5824     locations->AddTemp(Location::RequiresRegister());
5825   }
5826 }
5827 
VisitInstanceOf(HInstanceOf * instruction)5828 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5829   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5830   LocationSummary* locations = instruction->GetLocations();
5831   Location obj_loc = locations->InAt(0);
5832   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5833   Location cls = locations->InAt(1);
5834   Location out_loc =  locations->Out();
5835   CpuRegister out = out_loc.AsRegister<CpuRegister>();
5836   Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
5837       locations->GetTemp(0) :
5838       Location::NoLocation();
5839   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5840   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5841   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5842   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5843   SlowPathCode* slow_path = nullptr;
5844   NearLabel done, zero;
5845 
5846   // Return 0 if `obj` is null.
5847   // Avoid null check if we know obj is not null.
5848   if (instruction->MustDoNullCheck()) {
5849     __ testl(obj, obj);
5850     __ j(kEqual, &zero);
5851   }
5852 
5853   switch (type_check_kind) {
5854     case TypeCheckKind::kExactCheck: {
5855       // /* HeapReference<Class> */ out = obj->klass_
5856       GenerateReferenceLoadTwoRegisters(instruction,
5857                                         out_loc,
5858                                         obj_loc,
5859                                         class_offset,
5860                                         kCompilerReadBarrierOption);
5861       if (cls.IsRegister()) {
5862         __ cmpl(out, cls.AsRegister<CpuRegister>());
5863       } else {
5864         DCHECK(cls.IsStackSlot()) << cls;
5865         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5866       }
5867       if (zero.IsLinked()) {
5868         // Classes must be equal for the instanceof to succeed.
5869         __ j(kNotEqual, &zero);
5870         __ movl(out, Immediate(1));
5871         __ jmp(&done);
5872       } else {
5873         __ setcc(kEqual, out);
5874         // setcc only sets the low byte.
5875         __ andl(out, Immediate(1));
5876       }
5877       break;
5878     }
5879 
5880     case TypeCheckKind::kAbstractClassCheck: {
5881       // /* HeapReference<Class> */ out = obj->klass_
5882       GenerateReferenceLoadTwoRegisters(instruction,
5883                                         out_loc,
5884                                         obj_loc,
5885                                         class_offset,
5886                                         kCompilerReadBarrierOption);
5887       // If the class is abstract, we eagerly fetch the super class of the
5888       // object to avoid doing a comparison we know will fail.
5889       NearLabel loop, success;
5890       __ Bind(&loop);
5891       // /* HeapReference<Class> */ out = out->super_class_
5892       GenerateReferenceLoadOneRegister(instruction,
5893                                        out_loc,
5894                                        super_offset,
5895                                        maybe_temp_loc,
5896                                        kCompilerReadBarrierOption);
5897       __ testl(out, out);
5898       // If `out` is null, we use it for the result, and jump to `done`.
5899       __ j(kEqual, &done);
5900       if (cls.IsRegister()) {
5901         __ cmpl(out, cls.AsRegister<CpuRegister>());
5902       } else {
5903         DCHECK(cls.IsStackSlot()) << cls;
5904         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5905       }
5906       __ j(kNotEqual, &loop);
5907       __ movl(out, Immediate(1));
5908       if (zero.IsLinked()) {
5909         __ jmp(&done);
5910       }
5911       break;
5912     }
5913 
5914     case TypeCheckKind::kClassHierarchyCheck: {
5915       // /* HeapReference<Class> */ out = obj->klass_
5916       GenerateReferenceLoadTwoRegisters(instruction,
5917                                         out_loc,
5918                                         obj_loc,
5919                                         class_offset,
5920                                         kCompilerReadBarrierOption);
5921       // Walk over the class hierarchy to find a match.
5922       NearLabel loop, success;
5923       __ Bind(&loop);
5924       if (cls.IsRegister()) {
5925         __ cmpl(out, cls.AsRegister<CpuRegister>());
5926       } else {
5927         DCHECK(cls.IsStackSlot()) << cls;
5928         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5929       }
5930       __ j(kEqual, &success);
5931       // /* HeapReference<Class> */ out = out->super_class_
5932       GenerateReferenceLoadOneRegister(instruction,
5933                                        out_loc,
5934                                        super_offset,
5935                                        maybe_temp_loc,
5936                                        kCompilerReadBarrierOption);
5937       __ testl(out, out);
5938       __ j(kNotEqual, &loop);
5939       // If `out` is null, we use it for the result, and jump to `done`.
5940       __ jmp(&done);
5941       __ Bind(&success);
5942       __ movl(out, Immediate(1));
5943       if (zero.IsLinked()) {
5944         __ jmp(&done);
5945       }
5946       break;
5947     }
5948 
5949     case TypeCheckKind::kArrayObjectCheck: {
5950       // /* HeapReference<Class> */ out = obj->klass_
5951       GenerateReferenceLoadTwoRegisters(instruction,
5952                                         out_loc,
5953                                         obj_loc,
5954                                         class_offset,
5955                                         kCompilerReadBarrierOption);
5956       // Do an exact check.
5957       NearLabel exact_check;
5958       if (cls.IsRegister()) {
5959         __ cmpl(out, cls.AsRegister<CpuRegister>());
5960       } else {
5961         DCHECK(cls.IsStackSlot()) << cls;
5962         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5963       }
5964       __ j(kEqual, &exact_check);
5965       // Otherwise, we need to check that the object's class is a non-primitive array.
5966       // /* HeapReference<Class> */ out = out->component_type_
5967       GenerateReferenceLoadOneRegister(instruction,
5968                                        out_loc,
5969                                        component_offset,
5970                                        maybe_temp_loc,
5971                                        kCompilerReadBarrierOption);
5972       __ testl(out, out);
5973       // If `out` is null, we use it for the result, and jump to `done`.
5974       __ j(kEqual, &done);
5975       __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
5976       __ j(kNotEqual, &zero);
5977       __ Bind(&exact_check);
5978       __ movl(out, Immediate(1));
5979       __ jmp(&done);
5980       break;
5981     }
5982 
5983     case TypeCheckKind::kArrayCheck: {
5984       // No read barrier since the slow path will retry upon failure.
5985       // /* HeapReference<Class> */ out = obj->klass_
5986       GenerateReferenceLoadTwoRegisters(instruction,
5987                                         out_loc,
5988                                         obj_loc,
5989                                         class_offset,
5990                                         kWithoutReadBarrier);
5991       if (cls.IsRegister()) {
5992         __ cmpl(out, cls.AsRegister<CpuRegister>());
5993       } else {
5994         DCHECK(cls.IsStackSlot()) << cls;
5995         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5996       }
5997       DCHECK(locations->OnlyCallsOnSlowPath());
5998       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5999                                                                        /* is_fatal */ false);
6000       codegen_->AddSlowPath(slow_path);
6001       __ j(kNotEqual, slow_path->GetEntryLabel());
6002       __ movl(out, Immediate(1));
6003       if (zero.IsLinked()) {
6004         __ jmp(&done);
6005       }
6006       break;
6007     }
6008 
6009     case TypeCheckKind::kUnresolvedCheck:
6010     case TypeCheckKind::kInterfaceCheck: {
6011       // Note that we indeed only call on slow path, but we always go
6012       // into the slow path for the unresolved and interface check
6013       // cases.
6014       //
6015       // We cannot directly call the InstanceofNonTrivial runtime
6016       // entry point without resorting to a type checking slow path
6017       // here (i.e. by calling InvokeRuntime directly), as it would
6018       // require to assign fixed registers for the inputs of this
6019       // HInstanceOf instruction (following the runtime calling
6020       // convention), which might be cluttered by the potential first
6021       // read barrier emission at the beginning of this method.
6022       //
6023       // TODO: Introduce a new runtime entry point taking the object
6024       // to test (instead of its class) as argument, and let it deal
6025       // with the read barrier issues. This will let us refactor this
6026       // case of the `switch` code as it was previously (with a direct
6027       // call to the runtime not using a type checking slow path).
6028       // This should also be beneficial for the other cases above.
6029       DCHECK(locations->OnlyCallsOnSlowPath());
6030       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
6031                                                                        /* is_fatal */ false);
6032       codegen_->AddSlowPath(slow_path);
6033       __ jmp(slow_path->GetEntryLabel());
6034       if (zero.IsLinked()) {
6035         __ jmp(&done);
6036       }
6037       break;
6038     }
6039   }
6040 
6041   if (zero.IsLinked()) {
6042     __ Bind(&zero);
6043     __ xorl(out, out);
6044   }
6045 
6046   if (done.IsLinked()) {
6047     __ Bind(&done);
6048   }
6049 
6050   if (slow_path != nullptr) {
6051     __ Bind(slow_path->GetExitLabel());
6052   }
6053 }
6054 
IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind,bool throws_into_catch)6055 static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
6056   switch (type_check_kind) {
6057     case TypeCheckKind::kExactCheck:
6058     case TypeCheckKind::kAbstractClassCheck:
6059     case TypeCheckKind::kClassHierarchyCheck:
6060     case TypeCheckKind::kArrayObjectCheck:
6061       return !throws_into_catch && !kEmitCompilerReadBarrier;
6062     case TypeCheckKind::kInterfaceCheck:
6063       return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
6064     case TypeCheckKind::kArrayCheck:
6065     case TypeCheckKind::kUnresolvedCheck:
6066       return false;
6067   }
6068   LOG(FATAL) << "Unreachable";
6069   UNREACHABLE();
6070 }
6071 
VisitCheckCast(HCheckCast * instruction)6072 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6073   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
6074   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6075   bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch);
6076   LocationSummary::CallKind call_kind = is_fatal_slow_path
6077                                             ? LocationSummary::kNoCall
6078                                             : LocationSummary::kCallOnSlowPath;
6079   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
6080   locations->SetInAt(0, Location::RequiresRegister());
6081   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6082     // Require a register for the interface check since there is a loop that compares the class to
6083     // a memory address.
6084     locations->SetInAt(1, Location::RequiresRegister());
6085   } else {
6086     locations->SetInAt(1, Location::Any());
6087   }
6088 
6089   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
6090   locations->AddTemp(Location::RequiresRegister());
6091   // When read barriers are enabled, we need an additional temporary
6092   // register for some cases.
6093   if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
6094     locations->AddTemp(Location::RequiresRegister());
6095   }
6096 }
6097 
VisitCheckCast(HCheckCast * instruction)6098 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6099   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6100   LocationSummary* locations = instruction->GetLocations();
6101   Location obj_loc = locations->InAt(0);
6102   CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6103   Location cls = locations->InAt(1);
6104   Location temp_loc = locations->GetTemp(0);
6105   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6106   Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
6107       locations->GetTemp(1) :
6108       Location::NoLocation();
6109   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6110   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6111   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6112   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6113   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6114   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6115   const uint32_t object_array_data_offset =
6116       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6117 
6118   // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
6119   // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
6120   // read barriers is done for performance and code size reasons.
6121   bool is_type_check_slow_path_fatal =
6122       IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
6123   SlowPathCode* type_check_slow_path =
6124       new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
6125                                                            is_type_check_slow_path_fatal);
6126   codegen_->AddSlowPath(type_check_slow_path);
6127 
6128 
6129   NearLabel done;
6130   // Avoid null check if we know obj is not null.
6131   if (instruction->MustDoNullCheck()) {
6132     __ testl(obj, obj);
6133     __ j(kEqual, &done);
6134   }
6135 
6136   switch (type_check_kind) {
6137     case TypeCheckKind::kExactCheck:
6138     case TypeCheckKind::kArrayCheck: {
6139       // /* HeapReference<Class> */ temp = obj->klass_
6140       GenerateReferenceLoadTwoRegisters(instruction,
6141                                         temp_loc,
6142                                         obj_loc,
6143                                         class_offset,
6144                                         kWithoutReadBarrier);
6145       if (cls.IsRegister()) {
6146         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6147       } else {
6148         DCHECK(cls.IsStackSlot()) << cls;
6149         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6150       }
6151       // Jump to slow path for throwing the exception or doing a
6152       // more involved array check.
6153       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6154       break;
6155     }
6156 
6157     case TypeCheckKind::kAbstractClassCheck: {
6158       // /* HeapReference<Class> */ temp = obj->klass_
6159       GenerateReferenceLoadTwoRegisters(instruction,
6160                                         temp_loc,
6161                                         obj_loc,
6162                                         class_offset,
6163                                         kWithoutReadBarrier);
6164       // If the class is abstract, we eagerly fetch the super class of the
6165       // object to avoid doing a comparison we know will fail.
6166       NearLabel loop;
6167       __ Bind(&loop);
6168       // /* HeapReference<Class> */ temp = temp->super_class_
6169       GenerateReferenceLoadOneRegister(instruction,
6170                                        temp_loc,
6171                                        super_offset,
6172                                        maybe_temp2_loc,
6173                                        kWithoutReadBarrier);
6174 
6175       // If the class reference currently in `temp` is null, jump to the slow path to throw the
6176       // exception.
6177       __ testl(temp, temp);
6178       // Otherwise, compare the classes.
6179       __ j(kZero, type_check_slow_path->GetEntryLabel());
6180       if (cls.IsRegister()) {
6181         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6182       } else {
6183         DCHECK(cls.IsStackSlot()) << cls;
6184         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6185       }
6186       __ j(kNotEqual, &loop);
6187       break;
6188     }
6189 
6190     case TypeCheckKind::kClassHierarchyCheck: {
6191       // /* HeapReference<Class> */ temp = obj->klass_
6192       GenerateReferenceLoadTwoRegisters(instruction,
6193                                         temp_loc,
6194                                         obj_loc,
6195                                         class_offset,
6196                                         kWithoutReadBarrier);
6197       // Walk over the class hierarchy to find a match.
6198       NearLabel loop;
6199       __ Bind(&loop);
6200       if (cls.IsRegister()) {
6201         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6202       } else {
6203         DCHECK(cls.IsStackSlot()) << cls;
6204         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6205       }
6206       __ j(kEqual, &done);
6207 
6208       // /* HeapReference<Class> */ temp = temp->super_class_
6209       GenerateReferenceLoadOneRegister(instruction,
6210                                        temp_loc,
6211                                        super_offset,
6212                                        maybe_temp2_loc,
6213                                        kWithoutReadBarrier);
6214 
6215       // If the class reference currently in `temp` is not null, jump
6216       // back at the beginning of the loop.
6217       __ testl(temp, temp);
6218       __ j(kNotZero, &loop);
6219       // Otherwise, jump to the slow path to throw the exception.
6220       __ jmp(type_check_slow_path->GetEntryLabel());
6221       break;
6222     }
6223 
6224     case TypeCheckKind::kArrayObjectCheck: {
6225       // /* HeapReference<Class> */ temp = obj->klass_
6226       GenerateReferenceLoadTwoRegisters(instruction,
6227                                         temp_loc,
6228                                         obj_loc,
6229                                         class_offset,
6230                                         kWithoutReadBarrier);
6231       // Do an exact check.
6232       NearLabel check_non_primitive_component_type;
6233       if (cls.IsRegister()) {
6234         __ cmpl(temp, cls.AsRegister<CpuRegister>());
6235       } else {
6236         DCHECK(cls.IsStackSlot()) << cls;
6237         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6238       }
6239       __ j(kEqual, &done);
6240 
6241       // Otherwise, we need to check that the object's class is a non-primitive array.
6242       // /* HeapReference<Class> */ temp = temp->component_type_
6243       GenerateReferenceLoadOneRegister(instruction,
6244                                        temp_loc,
6245                                        component_offset,
6246                                        maybe_temp2_loc,
6247                                        kWithoutReadBarrier);
6248 
6249       // If the component type is not null (i.e. the object is indeed
6250       // an array), jump to label `check_non_primitive_component_type`
6251       // to further check that this component type is not a primitive
6252       // type.
6253       __ testl(temp, temp);
6254       // Otherwise, jump to the slow path to throw the exception.
6255       __ j(kZero, type_check_slow_path->GetEntryLabel());
6256       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6257       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6258       break;
6259     }
6260 
6261     case TypeCheckKind::kUnresolvedCheck: {
6262       // We always go into the type check slow path for the unresolved case.
6263       //
6264       // We cannot directly call the CheckCast runtime entry point
6265       // without resorting to a type checking slow path here (i.e. by
6266       // calling InvokeRuntime directly), as it would require to
6267       // assign fixed registers for the inputs of this HInstanceOf
6268       // instruction (following the runtime calling convention), which
6269       // might be cluttered by the potential first read barrier
6270       // emission at the beginning of this method.
6271       __ jmp(type_check_slow_path->GetEntryLabel());
6272       break;
6273     }
6274 
6275     case TypeCheckKind::kInterfaceCheck:
6276       // Fast path for the interface check. We always go slow path for heap poisoning since
6277       // unpoisoning cls would require an extra temp.
6278       if (!kPoisonHeapReferences) {
6279         // Try to avoid read barriers to improve the fast path. We can not get false positives by
6280         // doing this.
6281         // /* HeapReference<Class> */ temp = obj->klass_
6282         GenerateReferenceLoadTwoRegisters(instruction,
6283                                           temp_loc,
6284                                           obj_loc,
6285                                           class_offset,
6286                                           kWithoutReadBarrier);
6287 
6288         // /* HeapReference<Class> */ temp = temp->iftable_
6289         GenerateReferenceLoadTwoRegisters(instruction,
6290                                           temp_loc,
6291                                           temp_loc,
6292                                           iftable_offset,
6293                                           kWithoutReadBarrier);
6294         // Iftable is never null.
6295         __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
6296         // Loop through the iftable and check if any class matches.
6297         NearLabel start_loop;
6298         __ Bind(&start_loop);
6299         // Need to subtract first to handle the empty array case.
6300         __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
6301         __ j(kNegative, type_check_slow_path->GetEntryLabel());
6302         // Go to next interface if the classes do not match.
6303         __ cmpl(cls.AsRegister<CpuRegister>(),
6304                 CodeGeneratorX86_64::ArrayAddress(temp,
6305                                                   maybe_temp2_loc,
6306                                                   TIMES_4,
6307                                                   object_array_data_offset));
6308         __ j(kNotEqual, &start_loop);  // Return if same class.
6309       } else {
6310         __ jmp(type_check_slow_path->GetEntryLabel());
6311       }
6312       break;
6313   }
6314 
6315   if (done.IsLinked()) {
6316     __ Bind(&done);
6317   }
6318 
6319   __ Bind(type_check_slow_path->GetExitLabel());
6320 }
6321 
VisitMonitorOperation(HMonitorOperation * instruction)6322 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6323   LocationSummary* locations =
6324       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
6325   InvokeRuntimeCallingConvention calling_convention;
6326   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6327 }
6328 
VisitMonitorOperation(HMonitorOperation * instruction)6329 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6330   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
6331                           instruction,
6332                           instruction->GetDexPc());
6333   if (instruction->IsEnter()) {
6334     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6335   } else {
6336     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6337   }
6338 }
6339 
VisitAnd(HAnd * instruction)6340 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6341 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6342 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6343 
HandleBitwiseOperation(HBinaryOperation * instruction)6344 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6345   LocationSummary* locations =
6346       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6347   DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6348          || instruction->GetResultType() == Primitive::kPrimLong);
6349   locations->SetInAt(0, Location::RequiresRegister());
6350   locations->SetInAt(1, Location::Any());
6351   locations->SetOut(Location::SameAsFirstInput());
6352 }
6353 
VisitAnd(HAnd * instruction)6354 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6355   HandleBitwiseOperation(instruction);
6356 }
6357 
VisitOr(HOr * instruction)6358 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6359   HandleBitwiseOperation(instruction);
6360 }
6361 
VisitXor(HXor * instruction)6362 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6363   HandleBitwiseOperation(instruction);
6364 }
6365 
HandleBitwiseOperation(HBinaryOperation * instruction)6366 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6367   LocationSummary* locations = instruction->GetLocations();
6368   Location first = locations->InAt(0);
6369   Location second = locations->InAt(1);
6370   DCHECK(first.Equals(locations->Out()));
6371 
6372   if (instruction->GetResultType() == Primitive::kPrimInt) {
6373     if (second.IsRegister()) {
6374       if (instruction->IsAnd()) {
6375         __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6376       } else if (instruction->IsOr()) {
6377         __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6378       } else {
6379         DCHECK(instruction->IsXor());
6380         __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6381       }
6382     } else if (second.IsConstant()) {
6383       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6384       if (instruction->IsAnd()) {
6385         __ andl(first.AsRegister<CpuRegister>(), imm);
6386       } else if (instruction->IsOr()) {
6387         __ orl(first.AsRegister<CpuRegister>(), imm);
6388       } else {
6389         DCHECK(instruction->IsXor());
6390         __ xorl(first.AsRegister<CpuRegister>(), imm);
6391       }
6392     } else {
6393       Address address(CpuRegister(RSP), second.GetStackIndex());
6394       if (instruction->IsAnd()) {
6395         __ andl(first.AsRegister<CpuRegister>(), address);
6396       } else if (instruction->IsOr()) {
6397         __ orl(first.AsRegister<CpuRegister>(), address);
6398       } else {
6399         DCHECK(instruction->IsXor());
6400         __ xorl(first.AsRegister<CpuRegister>(), address);
6401       }
6402     }
6403   } else {
6404     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
6405     CpuRegister first_reg = first.AsRegister<CpuRegister>();
6406     bool second_is_constant = false;
6407     int64_t value = 0;
6408     if (second.IsConstant()) {
6409       second_is_constant = true;
6410       value = second.GetConstant()->AsLongConstant()->GetValue();
6411     }
6412     bool is_int32_value = IsInt<32>(value);
6413 
6414     if (instruction->IsAnd()) {
6415       if (second_is_constant) {
6416         if (is_int32_value) {
6417           __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6418         } else {
6419           __ andq(first_reg, codegen_->LiteralInt64Address(value));
6420         }
6421       } else if (second.IsDoubleStackSlot()) {
6422         __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6423       } else {
6424         __ andq(first_reg, second.AsRegister<CpuRegister>());
6425       }
6426     } else if (instruction->IsOr()) {
6427       if (second_is_constant) {
6428         if (is_int32_value) {
6429           __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6430         } else {
6431           __ orq(first_reg, codegen_->LiteralInt64Address(value));
6432         }
6433       } else if (second.IsDoubleStackSlot()) {
6434         __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6435       } else {
6436         __ orq(first_reg, second.AsRegister<CpuRegister>());
6437       }
6438     } else {
6439       DCHECK(instruction->IsXor());
6440       if (second_is_constant) {
6441         if (is_int32_value) {
6442           __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6443         } else {
6444           __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6445         }
6446       } else if (second.IsDoubleStackSlot()) {
6447         __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6448       } else {
6449         __ xorq(first_reg, second.AsRegister<CpuRegister>());
6450       }
6451     }
6452   }
6453 }
6454 
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6455 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
6456     HInstruction* instruction,
6457     Location out,
6458     uint32_t offset,
6459     Location maybe_temp,
6460     ReadBarrierOption read_barrier_option) {
6461   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6462   if (read_barrier_option == kWithReadBarrier) {
6463     CHECK(kEmitCompilerReadBarrier);
6464     if (kUseBakerReadBarrier) {
6465       // Load with fast path based Baker's read barrier.
6466       // /* HeapReference<Object> */ out = *(out + offset)
6467       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6468           instruction, out, out_reg, offset, /* needs_null_check */ false);
6469     } else {
6470       // Load with slow path based read barrier.
6471       // Save the value of `out` into `maybe_temp` before overwriting it
6472       // in the following move operation, as we will need it for the
6473       // read barrier below.
6474       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6475       __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6476       // /* HeapReference<Object> */ out = *(out + offset)
6477       __ movl(out_reg, Address(out_reg, offset));
6478       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6479     }
6480   } else {
6481     // Plain load with no read barrier.
6482     // /* HeapReference<Object> */ out = *(out + offset)
6483     __ movl(out_reg, Address(out_reg, offset));
6484     __ MaybeUnpoisonHeapReference(out_reg);
6485   }
6486 }
6487 
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)6488 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
6489     HInstruction* instruction,
6490     Location out,
6491     Location obj,
6492     uint32_t offset,
6493     ReadBarrierOption read_barrier_option) {
6494   CpuRegister out_reg = out.AsRegister<CpuRegister>();
6495   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6496   if (read_barrier_option == kWithReadBarrier) {
6497     CHECK(kEmitCompilerReadBarrier);
6498     if (kUseBakerReadBarrier) {
6499       // Load with fast path based Baker's read barrier.
6500       // /* HeapReference<Object> */ out = *(obj + offset)
6501       codegen_->GenerateFieldLoadWithBakerReadBarrier(
6502           instruction, out, obj_reg, offset, /* needs_null_check */ false);
6503     } else {
6504       // Load with slow path based read barrier.
6505       // /* HeapReference<Object> */ out = *(obj + offset)
6506       __ movl(out_reg, Address(obj_reg, offset));
6507       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6508     }
6509   } else {
6510     // Plain load with no read barrier.
6511     // /* HeapReference<Object> */ out = *(obj + offset)
6512     __ movl(out_reg, Address(obj_reg, offset));
6513     __ MaybeUnpoisonHeapReference(out_reg);
6514   }
6515 }
6516 
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)6517 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
6518     HInstruction* instruction,
6519     Location root,
6520     const Address& address,
6521     Label* fixup_label,
6522     ReadBarrierOption read_barrier_option) {
6523   CpuRegister root_reg = root.AsRegister<CpuRegister>();
6524   if (read_barrier_option == kWithReadBarrier) {
6525     DCHECK(kEmitCompilerReadBarrier);
6526     if (kUseBakerReadBarrier) {
6527       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6528       // Baker's read barrier are used:
6529       //
6530       //   root = obj.field;
6531       //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6532       //   if (temp != null) {
6533       //     root = temp(root)
6534       //   }
6535 
6536       // /* GcRoot<mirror::Object> */ root = *address
6537       __ movl(root_reg, address);
6538       if (fixup_label != nullptr) {
6539         __ Bind(fixup_label);
6540       }
6541       static_assert(
6542           sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6543           "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6544           "have different sizes.");
6545       static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6546                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
6547                     "have different sizes.");
6548 
6549       // Slow path marking the GC root `root`.
6550       SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
6551           instruction, root, /* unpoison_ref_before_marking */ false);
6552       codegen_->AddSlowPath(slow_path);
6553 
6554       // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
6555       const int32_t entry_point_offset =
6556           CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
6557       __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
6558       // The entrypoint is null when the GC is not marking.
6559       __ j(kNotEqual, slow_path->GetEntryLabel());
6560       __ Bind(slow_path->GetExitLabel());
6561     } else {
6562       // GC root loaded through a slow path for read barriers other
6563       // than Baker's.
6564       // /* GcRoot<mirror::Object>* */ root = address
6565       __ leaq(root_reg, address);
6566       if (fixup_label != nullptr) {
6567         __ Bind(fixup_label);
6568       }
6569       // /* mirror::Object* */ root = root->Read()
6570       codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6571     }
6572   } else {
6573     // Plain GC root load with no read barrier.
6574     // /* GcRoot<mirror::Object> */ root = *address
6575     __ movl(root_reg, address);
6576     if (fixup_label != nullptr) {
6577       __ Bind(fixup_label);
6578     }
6579     // Note that GC roots are not affected by heap poisoning, thus we
6580     // do not have to unpoison `root_reg` here.
6581   }
6582 }
6583 
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)6584 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6585                                                                 Location ref,
6586                                                                 CpuRegister obj,
6587                                                                 uint32_t offset,
6588                                                                 bool needs_null_check) {
6589   DCHECK(kEmitCompilerReadBarrier);
6590   DCHECK(kUseBakerReadBarrier);
6591 
6592   // /* HeapReference<Object> */ ref = *(obj + offset)
6593   Address src(obj, offset);
6594   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
6595 }
6596 
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)6597 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6598                                                                 Location ref,
6599                                                                 CpuRegister obj,
6600                                                                 uint32_t data_offset,
6601                                                                 Location index,
6602                                                                 bool needs_null_check) {
6603   DCHECK(kEmitCompilerReadBarrier);
6604   DCHECK(kUseBakerReadBarrier);
6605 
6606   static_assert(
6607       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6608       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6609   // /* HeapReference<Object> */ ref =
6610   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6611   Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
6612   GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
6613 }
6614 
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)6615 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6616                                                                     Location ref,
6617                                                                     CpuRegister obj,
6618                                                                     const Address& src,
6619                                                                     bool needs_null_check,
6620                                                                     bool always_update_field,
6621                                                                     CpuRegister* temp1,
6622                                                                     CpuRegister* temp2) {
6623   DCHECK(kEmitCompilerReadBarrier);
6624   DCHECK(kUseBakerReadBarrier);
6625 
6626   // In slow path based read barriers, the read barrier call is
6627   // inserted after the original load. However, in fast path based
6628   // Baker's read barriers, we need to perform the load of
6629   // mirror::Object::monitor_ *before* the original reference load.
6630   // This load-load ordering is required by the read barrier.
6631   // The fast path/slow path (for Baker's algorithm) should look like:
6632   //
6633   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6634   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6635   //   HeapReference<Object> ref = *src;  // Original reference load.
6636   //   bool is_gray = (rb_state == ReadBarrier::GrayState());
6637   //   if (is_gray) {
6638   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
6639   //   }
6640   //
6641   // Note: the original implementation in ReadBarrier::Barrier is
6642   // slightly more complex as:
6643   // - it implements the load-load fence using a data dependency on
6644   //   the high-bits of rb_state, which are expected to be all zeroes
6645   //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6646   //   here, which is a no-op thanks to the x86-64 memory model);
6647   // - it performs additional checks that we do not do here for
6648   //   performance reasons.
6649 
6650   CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6651   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6652 
6653   // Given the numeric representation, it's enough to check the low bit of the rb_state.
6654   static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
6655   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
6656   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
6657   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
6658   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
6659 
6660   // if (rb_state == ReadBarrier::GrayState())
6661   //   ref = ReadBarrier::Mark(ref);
6662   // At this point, just do the "if" and make sure that flags are preserved until the branch.
6663   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
6664   if (needs_null_check) {
6665     MaybeRecordImplicitNullCheck(instruction);
6666   }
6667 
6668   // Load fence to prevent load-load reordering.
6669   // Note that this is a no-op, thanks to the x86-64 memory model.
6670   GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6671 
6672   // The actual reference load.
6673   // /* HeapReference<Object> */ ref = *src
6674   __ movl(ref_reg, src);  // Flags are unaffected.
6675 
6676   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
6677   // Slow path marking the object `ref` when it is gray.
6678   SlowPathCode* slow_path;
6679   if (always_update_field) {
6680     DCHECK(temp1 != nullptr);
6681     DCHECK(temp2 != nullptr);
6682     slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
6683         instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
6684   } else {
6685     slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
6686         instruction, ref, /* unpoison_ref_before_marking */ true);
6687   }
6688   AddSlowPath(slow_path);
6689 
6690   // We have done the "if" of the gray bit check above, now branch based on the flags.
6691   __ j(kNotZero, slow_path->GetEntryLabel());
6692 
6693   // Object* ref = ref_addr->AsMirrorPtr()
6694   __ MaybeUnpoisonHeapReference(ref_reg);
6695 
6696   __ Bind(slow_path->GetExitLabel());
6697 }
6698 
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6699 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6700                                                   Location out,
6701                                                   Location ref,
6702                                                   Location obj,
6703                                                   uint32_t offset,
6704                                                   Location index) {
6705   DCHECK(kEmitCompilerReadBarrier);
6706 
6707   // Insert a slow path based read barrier *after* the reference load.
6708   //
6709   // If heap poisoning is enabled, the unpoisoning of the loaded
6710   // reference will be carried out by the runtime within the slow
6711   // path.
6712   //
6713   // Note that `ref` currently does not get unpoisoned (when heap
6714   // poisoning is enabled), which is alright as the `ref` argument is
6715   // not used by the artReadBarrierSlow entry point.
6716   //
6717   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6718   SlowPathCode* slow_path = new (GetGraph()->GetArena())
6719       ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6720   AddSlowPath(slow_path);
6721 
6722   __ jmp(slow_path->GetEntryLabel());
6723   __ Bind(slow_path->GetExitLabel());
6724 }
6725 
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6726 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6727                                                        Location out,
6728                                                        Location ref,
6729                                                        Location obj,
6730                                                        uint32_t offset,
6731                                                        Location index) {
6732   if (kEmitCompilerReadBarrier) {
6733     // Baker's read barriers shall be handled by the fast path
6734     // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6735     DCHECK(!kUseBakerReadBarrier);
6736     // If heap poisoning is enabled, unpoisoning will be taken care of
6737     // by the runtime within the slow path.
6738     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6739   } else if (kPoisonHeapReferences) {
6740     __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6741   }
6742 }
6743 
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6744 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6745                                                          Location out,
6746                                                          Location root) {
6747   DCHECK(kEmitCompilerReadBarrier);
6748 
6749   // Insert a slow path based read barrier *after* the GC root load.
6750   //
6751   // Note that GC roots are not affected by heap poisoning, so we do
6752   // not need to do anything special for this here.
6753   SlowPathCode* slow_path =
6754       new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6755   AddSlowPath(slow_path);
6756 
6757   __ jmp(slow_path->GetEntryLabel());
6758   __ Bind(slow_path->GetExitLabel());
6759 }
6760 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6761 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6762   // Nothing to do, this should be removed during prepare for register allocator.
6763   LOG(FATAL) << "Unreachable";
6764 }
6765 
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6766 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6767   // Nothing to do, this should be removed during prepare for register allocator.
6768   LOG(FATAL) << "Unreachable";
6769 }
6770 
6771 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6772 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6773   LocationSummary* locations =
6774       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6775   locations->SetInAt(0, Location::RequiresRegister());
6776   locations->AddTemp(Location::RequiresRegister());
6777   locations->AddTemp(Location::RequiresRegister());
6778 }
6779 
VisitPackedSwitch(HPackedSwitch * switch_instr)6780 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6781   int32_t lower_bound = switch_instr->GetStartValue();
6782   uint32_t num_entries = switch_instr->GetNumEntries();
6783   LocationSummary* locations = switch_instr->GetLocations();
6784   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6785   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6786   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6787   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6788 
6789   // Should we generate smaller inline compare/jumps?
6790   if (num_entries <= kPackedSwitchJumpTableThreshold) {
6791     // Figure out the correct compare values and jump conditions.
6792     // Handle the first compare/branch as a special case because it might
6793     // jump to the default case.
6794     DCHECK_GT(num_entries, 2u);
6795     Condition first_condition;
6796     uint32_t index;
6797     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6798     if (lower_bound != 0) {
6799       first_condition = kLess;
6800       __ cmpl(value_reg_in, Immediate(lower_bound));
6801       __ j(first_condition, codegen_->GetLabelOf(default_block));
6802       __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6803 
6804       index = 1;
6805     } else {
6806       // Handle all the compare/jumps below.
6807       first_condition = kBelow;
6808       index = 0;
6809     }
6810 
6811     // Handle the rest of the compare/jumps.
6812     for (; index + 1 < num_entries; index += 2) {
6813       int32_t compare_to_value = lower_bound + index + 1;
6814       __ cmpl(value_reg_in, Immediate(compare_to_value));
6815       // Jump to successors[index] if value < case_value[index].
6816       __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6817       // Jump to successors[index + 1] if value == case_value[index + 1].
6818       __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6819     }
6820 
6821     if (index != num_entries) {
6822       // There are an odd number of entries. Handle the last one.
6823       DCHECK_EQ(index + 1, num_entries);
6824       __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6825       __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6826     }
6827 
6828     // And the default for any other value.
6829     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6830       __ jmp(codegen_->GetLabelOf(default_block));
6831     }
6832     return;
6833   }
6834 
6835   // Remove the bias, if needed.
6836   Register value_reg_out = value_reg_in.AsRegister();
6837   if (lower_bound != 0) {
6838     __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6839     value_reg_out = temp_reg.AsRegister();
6840   }
6841   CpuRegister value_reg(value_reg_out);
6842 
6843   // Is the value in range?
6844   __ cmpl(value_reg, Immediate(num_entries - 1));
6845   __ j(kAbove, codegen_->GetLabelOf(default_block));
6846 
6847   // We are in the range of the table.
6848   // Load the address of the jump table in the constant area.
6849   __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6850 
6851   // Load the (signed) offset from the jump table.
6852   __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6853 
6854   // Add the offset to the address of the table base.
6855   __ addq(temp_reg, base_reg);
6856 
6857   // And jump.
6858   __ jmp(temp_reg);
6859 }
6860 
Load32BitValue(CpuRegister dest,int32_t value)6861 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6862   if (value == 0) {
6863     __ xorl(dest, dest);
6864   } else {
6865     __ movl(dest, Immediate(value));
6866   }
6867 }
6868 
Load64BitValue(CpuRegister dest,int64_t value)6869 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6870   if (value == 0) {
6871     // Clears upper bits too.
6872     __ xorl(dest, dest);
6873   } else if (IsUint<32>(value)) {
6874     // We can use a 32 bit move, as it will zero-extend and is shorter.
6875     __ movl(dest, Immediate(static_cast<int32_t>(value)));
6876   } else {
6877     __ movq(dest, Immediate(value));
6878   }
6879 }
6880 
Load32BitValue(XmmRegister dest,int32_t value)6881 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6882   if (value == 0) {
6883     __ xorps(dest, dest);
6884   } else {
6885     __ movss(dest, LiteralInt32Address(value));
6886   }
6887 }
6888 
Load64BitValue(XmmRegister dest,int64_t value)6889 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6890   if (value == 0) {
6891     __ xorpd(dest, dest);
6892   } else {
6893     __ movsd(dest, LiteralInt64Address(value));
6894   }
6895 }
6896 
Load32BitValue(XmmRegister dest,float value)6897 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6898   Load32BitValue(dest, bit_cast<int32_t, float>(value));
6899 }
6900 
Load64BitValue(XmmRegister dest,double value)6901 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6902   Load64BitValue(dest, bit_cast<int64_t, double>(value));
6903 }
6904 
Compare32BitValue(CpuRegister dest,int32_t value)6905 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6906   if (value == 0) {
6907     __ testl(dest, dest);
6908   } else {
6909     __ cmpl(dest, Immediate(value));
6910   }
6911 }
6912 
Compare64BitValue(CpuRegister dest,int64_t value)6913 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6914   if (IsInt<32>(value)) {
6915     if (value == 0) {
6916       __ testq(dest, dest);
6917     } else {
6918       __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6919     }
6920   } else {
6921     // Value won't fit in an int.
6922     __ cmpq(dest, LiteralInt64Address(value));
6923   }
6924 }
6925 
GenerateIntCompare(Location lhs,Location rhs)6926 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
6927   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
6928   GenerateIntCompare(lhs_reg, rhs);
6929 }
6930 
GenerateIntCompare(CpuRegister lhs,Location rhs)6931 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
6932   if (rhs.IsConstant()) {
6933     int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
6934     Compare32BitValue(lhs, value);
6935   } else if (rhs.IsStackSlot()) {
6936     __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
6937   } else {
6938     __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
6939   }
6940 }
6941 
GenerateLongCompare(Location lhs,Location rhs)6942 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
6943   CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
6944   if (rhs.IsConstant()) {
6945     int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
6946     Compare64BitValue(lhs_reg, value);
6947   } else if (rhs.IsDoubleStackSlot()) {
6948     __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
6949   } else {
6950     __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
6951   }
6952 }
6953 
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)6954 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
6955                                           Location index,
6956                                           ScaleFactor scale,
6957                                           uint32_t data_offset) {
6958   return index.IsConstant() ?
6959       Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
6960       Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
6961 }
6962 
Store64BitValueToStack(Location dest,int64_t value)6963 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6964   DCHECK(dest.IsDoubleStackSlot());
6965   if (IsInt<32>(value)) {
6966     // Can move directly as an int32 constant.
6967     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6968             Immediate(static_cast<int32_t>(value)));
6969   } else {
6970     Load64BitValue(CpuRegister(TMP), value);
6971     __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6972   }
6973 }
6974 
6975 /**
6976  * Class to handle late fixup of offsets into constant area.
6977  */
6978 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6979  public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)6980   RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
6981       : codegen_(&codegen), offset_into_constant_area_(offset) {}
6982 
6983  protected:
SetOffset(size_t offset)6984   void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
6985 
6986   CodeGeneratorX86_64* codegen_;
6987 
6988  private:
Process(const MemoryRegion & region,int pos)6989   void Process(const MemoryRegion& region, int pos) OVERRIDE {
6990     // Patch the correct offset for the instruction.  We use the address of the
6991     // 'next' instruction, which is 'pos' (patch the 4 bytes before).
6992     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
6993     int32_t relative_position = constant_offset - pos;
6994 
6995     // Patch in the right value.
6996     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
6997   }
6998 
6999   // Location in constant area that the fixup refers to.
7000   size_t offset_into_constant_area_;
7001 };
7002 
7003 /**
7004  t * Class to handle late fixup of offsets to a jump table that will be created in the
7005  * constant area.
7006  */
7007 class JumpTableRIPFixup : public RIPFixup {
7008  public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7009   JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7010       : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7011 
CreateJumpTable()7012   void CreateJumpTable() {
7013     X86_64Assembler* assembler = codegen_->GetAssembler();
7014 
7015     // Ensure that the reference to the jump table has the correct offset.
7016     const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7017     SetOffset(offset_in_constant_table);
7018 
7019     // Compute the offset from the start of the function to this jump table.
7020     const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7021 
7022     // Populate the jump table with the correct values for the jump table.
7023     int32_t num_entries = switch_instr_->GetNumEntries();
7024     HBasicBlock* block = switch_instr_->GetBlock();
7025     const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7026     // The value that we want is the target offset - the position of the table.
7027     for (int32_t i = 0; i < num_entries; i++) {
7028       HBasicBlock* b = successors[i];
7029       Label* l = codegen_->GetLabelOf(b);
7030       DCHECK(l->IsBound());
7031       int32_t offset_to_block = l->Position() - current_table_offset;
7032       assembler->AppendInt32(offset_to_block);
7033     }
7034   }
7035 
7036  private:
7037   const HPackedSwitch* switch_instr_;
7038 };
7039 
Finalize(CodeAllocator * allocator)7040 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7041   // Generate the constant area if needed.
7042   X86_64Assembler* assembler = GetAssembler();
7043   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7044     // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7045     assembler->Align(4, 0);
7046     constant_area_start_ = assembler->CodeSize();
7047 
7048     // Populate any jump tables.
7049     for (auto jump_table : fixups_to_jump_tables_) {
7050       jump_table->CreateJumpTable();
7051     }
7052 
7053     // And now add the constant area to the generated code.
7054     assembler->AddConstantArea();
7055   }
7056 
7057   // And finish up.
7058   CodeGenerator::Finalize(allocator);
7059 }
7060 
LiteralDoubleAddress(double v)7061 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7062   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
7063   return Address::RIP(fixup);
7064 }
7065 
LiteralFloatAddress(float v)7066 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7067   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
7068   return Address::RIP(fixup);
7069 }
7070 
LiteralInt32Address(int32_t v)7071 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7072   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
7073   return Address::RIP(fixup);
7074 }
7075 
LiteralInt64Address(int64_t v)7076 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7077   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
7078   return Address::RIP(fixup);
7079 }
7080 
7081 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,Primitive::Type type)7082 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
7083   if (!trg.IsValid()) {
7084     DCHECK_EQ(type, Primitive::kPrimVoid);
7085     return;
7086   }
7087 
7088   DCHECK_NE(type, Primitive::kPrimVoid);
7089 
7090   Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7091   if (trg.Equals(return_loc)) {
7092     return;
7093   }
7094 
7095   // Let the parallel move resolver take care of all of this.
7096   HParallelMove parallel_move(GetGraph()->GetArena());
7097   parallel_move.AddMove(return_loc, trg, type, nullptr);
7098   GetMoveResolver()->EmitNativeCode(&parallel_move);
7099 }
7100 
LiteralCaseTable(HPackedSwitch * switch_instr)7101 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7102   // Create a fixup to be used to create and address the jump table.
7103   JumpTableRIPFixup* table_fixup =
7104       new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
7105 
7106   // We have to populate the jump tables.
7107   fixups_to_jump_tables_.push_back(table_fixup);
7108   return Address::RIP(table_fixup);
7109 }
7110 
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7111 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7112                                              const Address& addr_high,
7113                                              int64_t v,
7114                                              HInstruction* instruction) {
7115   if (IsInt<32>(v)) {
7116     int32_t v_32 = v;
7117     __ movq(addr_low, Immediate(v_32));
7118     MaybeRecordImplicitNullCheck(instruction);
7119   } else {
7120     // Didn't fit in a register.  Do it in pieces.
7121     int32_t low_v = Low32Bits(v);
7122     int32_t high_v = High32Bits(v);
7123     __ movl(addr_low, Immediate(low_v));
7124     MaybeRecordImplicitNullCheck(instruction);
7125     __ movl(addr_high, Immediate(high_v));
7126   }
7127 }
7128 
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7129 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7130                                           const uint8_t* roots_data,
7131                                           const PatchInfo<Label>& info,
7132                                           uint64_t index_in_table) const {
7133   uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7134   uintptr_t address =
7135       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7136   typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
7137   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7138      dchecked_integral_cast<uint32_t>(address);
7139 }
7140 
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7141 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7142   for (const PatchInfo<Label>& info : jit_string_patches_) {
7143     const auto& it = jit_string_roots_.find(
7144         StringReference(&info.dex_file, dex::StringIndex(info.index)));
7145     DCHECK(it != jit_string_roots_.end());
7146     PatchJitRootUse(code, roots_data, info, it->second);
7147   }
7148 
7149   for (const PatchInfo<Label>& info : jit_class_patches_) {
7150     const auto& it = jit_class_roots_.find(
7151         TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
7152     DCHECK(it != jit_class_roots_.end());
7153     PatchJitRootUse(code, roots_data, info, it->second);
7154   }
7155 }
7156 
7157 #undef __
7158 
7159 }  // namespace x86_64
7160 }  // namespace art
7161